re2 2.0.0.beta1-x64-mingw32

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/ext/re2/re2.cc ADDED
@@ -0,0 +1,1889 @@
1
+ /*
2
+ * re2 (http://github.com/mudge/re2)
3
+ * Ruby bindings to re2, an "efficient, principled regular expression library"
4
+ *
5
+ * Copyright (c) 2010-2014, Paul Mucur (http://mudge.name)
6
+ * Released under the BSD Licence, please see LICENSE.txt
7
+ */
8
+
9
+ #include <ruby.h>
10
+ #include <re2/re2.h>
11
+ #include <re2/set.h>
12
+ #include <stdint.h>
13
+ #include <string>
14
+ #include <sstream>
15
+ #include <vector>
16
+ using std::string;
17
+ using std::ostringstream;
18
+ using std::nothrow;
19
+ using std::map;
20
+ using std::vector;
21
+
22
+ #define BOOL2RUBY(v) (v ? Qtrue : Qfalse)
23
+ #define UNUSED(x) ((void)x)
24
+
25
+ #ifndef RSTRING_LEN
26
+ #define RSTRING_LEN(x) (RSTRING(x)->len)
27
+ #endif
28
+
29
+ #ifndef RSTRING_PTR
30
+ #define RSTRING_PTR(x) (RSTRING(x)->ptr)
31
+ #endif
32
+
33
+ #ifdef HAVE_RUBY_ENCODING_H
34
+ #include <ruby/encoding.h>
35
+ #define ENCODED_STR_NEW(str, length, encoding) \
36
+ ({ \
37
+ VALUE _string = rb_str_new(str, length); \
38
+ int _enc = rb_enc_find_index(encoding); \
39
+ rb_enc_associate_index(_string, _enc); \
40
+ _string; \
41
+ })
42
+ #define ENCODED_STR_NEW2(str, length, str2) \
43
+ ({ \
44
+ VALUE _string = rb_str_new(str, length); \
45
+ int _enc = rb_enc_get_index(str2); \
46
+ rb_enc_associate_index(_string, _enc); \
47
+ _string; \
48
+ })
49
+ #else
50
+ #define ENCODED_STR_NEW(str, length, encoding) \
51
+ rb_str_new((const char *)str, (long)length)
52
+ #define ENCODED_STR_NEW2(str, length, str2) \
53
+ rb_str_new((const char *)str, (long)length)
54
+ #endif
55
+
56
+ #ifdef HAVE_RB_STR_SUBLEN
57
+ #define ENCODED_STR_SUBLEN(str, offset, encoding) \
58
+ LONG2NUM(rb_str_sublen(str, offset))
59
+ #else
60
+ #ifdef HAVE_RUBY_ENCODING_H
61
+ #define ENCODED_STR_SUBLEN(str, offset, encoding) \
62
+ ({ \
63
+ VALUE _string = ENCODED_STR_NEW(RSTRING_PTR(str), offset, encoding); \
64
+ rb_str_length(_string); \
65
+ })
66
+ #else
67
+ #define ENCODED_STR_SUBLEN(str, offset, encoding) \
68
+ LONG2NUM(offset)
69
+ #endif
70
+ #endif
71
+
72
+ #ifdef HAVE_ENDPOS_ARGUMENT
73
+ #define match(pattern, text, startpos, endpos, anchor, match, nmatch) \
74
+ (pattern->Match(text, startpos, endpos, anchor, match, nmatch))
75
+ #else
76
+ #define match(pattern, text, startpos, endpos, anchor, match, nmatch) \
77
+ (pattern->Match(text, startpos, anchor, match, nmatch))
78
+ #endif
79
+
80
+ typedef struct {
81
+ RE2 *pattern;
82
+ } re2_pattern;
83
+
84
+ typedef struct {
85
+ re2::StringPiece *matches;
86
+ int number_of_matches;
87
+ VALUE regexp, text;
88
+ } re2_matchdata;
89
+
90
+ typedef struct {
91
+ re2::StringPiece *input;
92
+ int number_of_capturing_groups;
93
+ bool eof;
94
+ VALUE regexp, text;
95
+ } re2_scanner;
96
+
97
+ typedef struct {
98
+ RE2::Set *set;
99
+ } re2_set;
100
+
101
+ VALUE re2_mRE2, re2_cRegexp, re2_cMatchData, re2_cScanner, re2_cSet,
102
+ re2_eSetMatchError, re2_eSetUnsupportedError;
103
+
104
+ /* Symbols used in RE2 options. */
105
+ static ID id_utf8, id_posix_syntax, id_longest_match, id_log_errors,
106
+ id_max_mem, id_literal, id_never_nl, id_case_sensitive,
107
+ id_perl_classes, id_word_boundary, id_one_line,
108
+ id_unanchored, id_anchor_start, id_anchor_both, id_exception;
109
+
110
+ void parse_re2_options(RE2::Options& re2_options, VALUE options) {
111
+ if (TYPE(options) != T_HASH) {
112
+ rb_raise(rb_eArgError, "options should be a hash");
113
+ }
114
+ VALUE utf8, posix_syntax, longest_match, log_errors,
115
+ max_mem, literal, never_nl, case_sensitive, perl_classes,
116
+ word_boundary, one_line;
117
+
118
+ utf8 = rb_hash_aref(options, ID2SYM(id_utf8));
119
+ if (!NIL_P(utf8)) {
120
+ re2_options.set_encoding(RTEST(utf8) ? RE2::Options::EncodingUTF8 : RE2::Options::EncodingLatin1);
121
+ }
122
+
123
+ posix_syntax = rb_hash_aref(options, ID2SYM(id_posix_syntax));
124
+ if (!NIL_P(posix_syntax)) {
125
+ re2_options.set_posix_syntax(RTEST(posix_syntax));
126
+ }
127
+
128
+ longest_match = rb_hash_aref(options, ID2SYM(id_longest_match));
129
+ if (!NIL_P(longest_match)) {
130
+ re2_options.set_longest_match(RTEST(longest_match));
131
+ }
132
+
133
+ log_errors = rb_hash_aref(options, ID2SYM(id_log_errors));
134
+ if (!NIL_P(log_errors)) {
135
+ re2_options.set_log_errors(RTEST(log_errors));
136
+ }
137
+
138
+ max_mem = rb_hash_aref(options, ID2SYM(id_max_mem));
139
+ if (!NIL_P(max_mem)) {
140
+ re2_options.set_max_mem(NUM2INT(max_mem));
141
+ }
142
+
143
+ literal = rb_hash_aref(options, ID2SYM(id_literal));
144
+ if (!NIL_P(literal)) {
145
+ re2_options.set_literal(RTEST(literal));
146
+ }
147
+
148
+ never_nl = rb_hash_aref(options, ID2SYM(id_never_nl));
149
+ if (!NIL_P(never_nl)) {
150
+ re2_options.set_never_nl(RTEST(never_nl));
151
+ }
152
+
153
+ case_sensitive = rb_hash_aref(options, ID2SYM(id_case_sensitive));
154
+ if (!NIL_P(case_sensitive)) {
155
+ re2_options.set_case_sensitive(RTEST(case_sensitive));
156
+ }
157
+
158
+ perl_classes = rb_hash_aref(options, ID2SYM(id_perl_classes));
159
+ if (!NIL_P(perl_classes)) {
160
+ re2_options.set_perl_classes(RTEST(perl_classes));
161
+ }
162
+
163
+ word_boundary = rb_hash_aref(options, ID2SYM(id_word_boundary));
164
+ if (!NIL_P(word_boundary)) {
165
+ re2_options.set_word_boundary(RTEST(word_boundary));
166
+ }
167
+
168
+ one_line = rb_hash_aref(options, ID2SYM(id_one_line));
169
+ if (!NIL_P(one_line)) {
170
+ re2_options.set_one_line(RTEST(one_line));
171
+ }
172
+ }
173
+
174
+ void re2_matchdata_mark(re2_matchdata* self) {
175
+ rb_gc_mark(self->regexp);
176
+ rb_gc_mark(self->text);
177
+ }
178
+
179
+ void re2_matchdata_free(re2_matchdata* self) {
180
+ if (self->matches) {
181
+ delete[] self->matches;
182
+ }
183
+ free(self);
184
+ }
185
+
186
+ void re2_scanner_mark(re2_scanner* self) {
187
+ rb_gc_mark(self->regexp);
188
+ rb_gc_mark(self->text);
189
+ }
190
+
191
+ void re2_scanner_free(re2_scanner* self) {
192
+ if (self->input) {
193
+ delete self->input;
194
+ }
195
+ free(self);
196
+ }
197
+
198
+ void re2_regexp_free(re2_pattern* self) {
199
+ if (self->pattern) {
200
+ delete self->pattern;
201
+ }
202
+ free(self);
203
+ }
204
+
205
+ static VALUE re2_matchdata_allocate(VALUE klass) {
206
+ re2_matchdata *m;
207
+ return Data_Make_Struct(klass, re2_matchdata, re2_matchdata_mark,
208
+ re2_matchdata_free, m);
209
+ }
210
+
211
+ static VALUE re2_scanner_allocate(VALUE klass) {
212
+ re2_scanner *c;
213
+ return Data_Make_Struct(klass, re2_scanner, re2_scanner_mark,
214
+ re2_scanner_free, c);
215
+ }
216
+
217
+ /*
218
+ * Returns a frozen copy of the string passed into +match+.
219
+ *
220
+ * @return [String] a frozen copy of the passed string.
221
+ * @example
222
+ * m = RE2::Regexp.new('(\d+)').match("bob 123")
223
+ * m.string #=> "bob 123"
224
+ */
225
+ static VALUE re2_matchdata_string(VALUE self) {
226
+ re2_matchdata *m;
227
+ Data_Get_Struct(self, re2_matchdata, m);
228
+
229
+ return m->text;
230
+ }
231
+
232
+ /*
233
+ * Returns the string passed into the scanner.
234
+ *
235
+ * @return [String] the original string.
236
+ * @example
237
+ * c = RE2::Regexp.new('(\d+)').scan("foo")
238
+ * c.string #=> "foo"
239
+ */
240
+ static VALUE re2_scanner_string(VALUE self) {
241
+ re2_scanner *c;
242
+ Data_Get_Struct(self, re2_scanner, c);
243
+
244
+ return c->text;
245
+ }
246
+
247
+ /*
248
+ * Returns whether the scanner has consumed all input or not.
249
+ *
250
+ * @return [Boolean] whether the scanner has consumed all input or not
251
+ * @example
252
+ * c = RE2::Regexp.new('(\d+)').scan("foo")
253
+ * c.eof? #=> true
254
+ */
255
+ static VALUE re2_scanner_eof(VALUE self) {
256
+ re2_scanner *c;
257
+ Data_Get_Struct(self, re2_scanner, c);
258
+
259
+ return BOOL2RUBY(c->eof);
260
+ }
261
+
262
+ /*
263
+ * Rewind the scanner to the start of the string.
264
+ *
265
+ * @example
266
+ * s = RE2::Regexp.new('(\d+)').scan("1 2 3")
267
+ * e = s.to_enum
268
+ * e.scan #=> ["1"]
269
+ * e.scan #=> ["2"]
270
+ * s.rewind
271
+ * e.scan #=> ["1"]
272
+ */
273
+ static VALUE re2_scanner_rewind(VALUE self) {
274
+ re2_scanner *c;
275
+ Data_Get_Struct(self, re2_scanner, c);
276
+
277
+ c->input = new(nothrow) re2::StringPiece(StringValuePtr(c->text));
278
+ c->eof = false;
279
+
280
+ return self;
281
+ }
282
+
283
+ /*
284
+ * Scan the given text incrementally for matches, returning an array of
285
+ * matches on each subsequent call. Returns nil if no matches are found.
286
+ *
287
+ * @return [Array<String>] the matches.
288
+ * @example
289
+ * s = RE2::Regexp.new('(\w+)').scan("Foo bar baz")
290
+ * s.scan #=> ["Foo"]
291
+ * s.scan #=> ["bar"]
292
+ */
293
+ static VALUE re2_scanner_scan(VALUE self) {
294
+ int i;
295
+ size_t original_input_size, new_input_size;
296
+ bool input_advanced;
297
+ re2_pattern *p;
298
+ re2_scanner *c;
299
+ VALUE result;
300
+
301
+ Data_Get_Struct(self, re2_scanner, c);
302
+ Data_Get_Struct(c->regexp, re2_pattern, p);
303
+
304
+ vector<RE2::Arg> argv(c->number_of_capturing_groups);
305
+ vector<RE2::Arg*> args(c->number_of_capturing_groups);
306
+ vector<string> matches(c->number_of_capturing_groups);
307
+
308
+ if (c->eof) {
309
+ return Qnil;
310
+ }
311
+
312
+ original_input_size = c->input->size();
313
+
314
+ for (i = 0; i < c->number_of_capturing_groups; i++) {
315
+ matches[i] = "";
316
+ argv[i] = &matches[i];
317
+ args[i] = &argv[i];
318
+ }
319
+
320
+ if (RE2::FindAndConsumeN(c->input, *p->pattern, &args[0],
321
+ c->number_of_capturing_groups)) {
322
+ result = rb_ary_new2(c->number_of_capturing_groups);
323
+ new_input_size = c->input->size();
324
+ input_advanced = new_input_size < original_input_size;
325
+
326
+ for (i = 0; i < c->number_of_capturing_groups; i++) {
327
+ if (matches[i].empty()) {
328
+ rb_ary_push(result, Qnil);
329
+ } else {
330
+ rb_ary_push(result, ENCODED_STR_NEW(matches[i].data(),
331
+ matches[i].size(),
332
+ p->pattern->options().encoding() == RE2::Options::EncodingUTF8 ? "UTF-8" : "ISO-8859-1"));
333
+ }
334
+ }
335
+
336
+ /* Check whether we've exhausted the input yet. */
337
+ c->eof = new_input_size == 0;
338
+
339
+ /* If the match didn't advance the input, we need to do this ourselves. */
340
+ if (!input_advanced && new_input_size > 0) {
341
+ c->input->remove_prefix(1);
342
+ }
343
+ } else {
344
+ result = Qnil;
345
+ }
346
+
347
+ return result;
348
+ }
349
+
350
+ /*
351
+ * Retrieve a matchdata by index or name.
352
+ */
353
+ re2::StringPiece *re2_matchdata_find_match(VALUE idx, VALUE self) {
354
+ int id;
355
+ re2_matchdata *m;
356
+ re2_pattern *p;
357
+ map<string, int> groups;
358
+ string name;
359
+ re2::StringPiece *match;
360
+
361
+ Data_Get_Struct(self, re2_matchdata, m);
362
+ Data_Get_Struct(m->regexp, re2_pattern, p);
363
+
364
+ if (FIXNUM_P(idx)) {
365
+ id = FIX2INT(idx);
366
+ } else {
367
+ if (SYMBOL_P(idx)) {
368
+ name = rb_id2name(SYM2ID(idx));
369
+ } else {
370
+ name = StringValuePtr(idx);
371
+ }
372
+
373
+ groups = p->pattern->NamedCapturingGroups();
374
+
375
+ if (groups.count(name) == 1) {
376
+ id = groups[name];
377
+ } else {
378
+ return NULL;
379
+ }
380
+ }
381
+
382
+ if (id >= 0 && id < m->number_of_matches) {
383
+ match = &m->matches[id];
384
+
385
+ if (!match->empty()) {
386
+ return match;
387
+ }
388
+ }
389
+
390
+ return NULL;
391
+ }
392
+
393
+ /*
394
+ * Returns the number of elements in the match array (including nils).
395
+ *
396
+ * @return [Integer] the number of elements
397
+ * @example
398
+ * m = RE2::Regexp.new('(\d+)').match("bob 123")
399
+ * m.size #=> 2
400
+ * m.length #=> 2
401
+ */
402
+ static VALUE re2_matchdata_size(VALUE self) {
403
+ re2_matchdata *m;
404
+ Data_Get_Struct(self, re2_matchdata, m);
405
+
406
+ return INT2FIX(m->number_of_matches);
407
+ }
408
+
409
+ /*
410
+ * Returns the offset of the start of the nth element of the matchdata.
411
+ *
412
+ * @param [Integer, String, Symbol] n the name or number of the match
413
+ * @return [Integer] the offset of the start of the match
414
+ * @example
415
+ * m = RE2::Regexp.new('ob (\d+)').match("bob 123")
416
+ * m.begin(0) #=> 1
417
+ * m.begin(1) #=> 4
418
+ */
419
+ static VALUE re2_matchdata_begin(VALUE self, VALUE n) {
420
+ re2_matchdata *m;
421
+ re2_pattern *p;
422
+ re2::StringPiece *match;
423
+ long offset;
424
+
425
+ Data_Get_Struct(self, re2_matchdata, m);
426
+ Data_Get_Struct(m->regexp, re2_pattern, p);
427
+
428
+ match = re2_matchdata_find_match(n, self);
429
+ if (match == NULL) {
430
+ return Qnil;
431
+ } else {
432
+ offset = reinterpret_cast<uintptr_t>(match->data()) - reinterpret_cast<uintptr_t>(StringValuePtr(m->text));
433
+
434
+ return ENCODED_STR_SUBLEN(StringValue(m->text), offset,
435
+ p->pattern->options().encoding() == RE2::Options::EncodingUTF8 ? "UTF-8" : "ISO-8859-1");
436
+ }
437
+ }
438
+
439
+ /*
440
+ * Returns the offset of the character following the end of the nth element of the matchdata.
441
+ *
442
+ * @param [Integer, String, Symbol] n the name or number of the match
443
+ * @return [Integer] the offset of the character following the end of the match
444
+ * @example
445
+ * m = RE2::Regexp.new('ob (\d+) b').match("bob 123 bob")
446
+ * m.end(0) #=> 9
447
+ * m.end(1) #=> 7
448
+ */
449
+ static VALUE re2_matchdata_end(VALUE self, VALUE n) {
450
+ re2_matchdata *m;
451
+ re2_pattern *p;
452
+ re2::StringPiece *match;
453
+ long offset;
454
+
455
+ Data_Get_Struct(self, re2_matchdata, m);
456
+ Data_Get_Struct(m->regexp, re2_pattern, p);
457
+
458
+ match = re2_matchdata_find_match(n, self);
459
+
460
+ if (match == NULL) {
461
+ return Qnil;
462
+ } else {
463
+ offset = reinterpret_cast<uintptr_t>(match->data()) - reinterpret_cast<uintptr_t>(StringValuePtr(m->text)) + match->size();
464
+
465
+ return ENCODED_STR_SUBLEN(StringValue(m->text), offset,
466
+ p->pattern->options().encoding() == RE2::Options::EncodingUTF8 ? "UTF-8" : "ISO-8859-1");
467
+ }
468
+ }
469
+
470
+ /*
471
+ * Returns the {RE2::Regexp} used in the match.
472
+ *
473
+ * @return [RE2::Regexp] the regexp used in the match
474
+ * @example
475
+ * m = RE2::Regexp.new('(\d+)').match("bob 123")
476
+ * m.regexp #=> #<RE2::Regexp /(\d+)/>
477
+ */
478
+ static VALUE re2_matchdata_regexp(VALUE self) {
479
+ re2_matchdata *m;
480
+ Data_Get_Struct(self, re2_matchdata, m);
481
+ return m->regexp;
482
+ }
483
+
484
+ /*
485
+ * Returns the {RE2::Regexp} used in the scanner.
486
+ *
487
+ * @return [RE2::Regexp] the regexp used in the scanner
488
+ * @example
489
+ * c = RE2::Regexp.new('(\d+)').scan("bob 123")
490
+ * c.regexp #=> #<RE2::Regexp /(\d+)/>
491
+ */
492
+ static VALUE re2_scanner_regexp(VALUE self) {
493
+ re2_scanner *c;
494
+ Data_Get_Struct(self, re2_scanner, c);
495
+
496
+ return c->regexp;
497
+ }
498
+
499
+ static VALUE re2_regexp_allocate(VALUE klass) {
500
+ re2_pattern *p;
501
+ return Data_Make_Struct(klass, re2_pattern, 0, re2_regexp_free, p);
502
+ }
503
+
504
+ /*
505
+ * Returns the array of matches.
506
+ *
507
+ * @return [Array<String, nil>] the array of matches
508
+ * @example
509
+ * m = RE2::Regexp.new('(\d+)').match("bob 123")
510
+ * m.to_a #=> ["123", "123"]
511
+ */
512
+ static VALUE re2_matchdata_to_a(VALUE self) {
513
+ int i;
514
+ re2_matchdata *m;
515
+ re2_pattern *p;
516
+ re2::StringPiece *match;
517
+ VALUE array;
518
+
519
+ Data_Get_Struct(self, re2_matchdata, m);
520
+ Data_Get_Struct(m->regexp, re2_pattern, p);
521
+
522
+ array = rb_ary_new2(m->number_of_matches);
523
+ for (i = 0; i < m->number_of_matches; i++) {
524
+ match = &m->matches[i];
525
+
526
+ if (match->empty()) {
527
+ rb_ary_push(array, Qnil);
528
+ } else {
529
+ rb_ary_push(array, ENCODED_STR_NEW(match->data(), match->size(),
530
+ p->pattern->options().encoding() == RE2::Options::EncodingUTF8 ? "UTF-8" : "ISO-8859-1"));
531
+ }
532
+ }
533
+
534
+ return array;
535
+ }
536
+
537
+ static VALUE re2_matchdata_nth_match(int nth, VALUE self) {
538
+ re2_matchdata *m;
539
+ re2_pattern *p;
540
+ re2::StringPiece *match;
541
+
542
+ Data_Get_Struct(self, re2_matchdata, m);
543
+ Data_Get_Struct(m->regexp, re2_pattern, p);
544
+
545
+ if (nth < 0 || nth >= m->number_of_matches) {
546
+ return Qnil;
547
+ } else {
548
+ match = &m->matches[nth];
549
+
550
+ if (match->empty()) {
551
+ return Qnil;
552
+ } else {
553
+ return ENCODED_STR_NEW(match->data(), match->size(),
554
+ p->pattern->options().encoding() == RE2::Options::EncodingUTF8 ? "UTF-8" : "ISO-8859-1");
555
+ }
556
+ }
557
+ }
558
+
559
+ static VALUE re2_matchdata_named_match(const char* name, VALUE self) {
560
+ int idx;
561
+ re2_matchdata *m;
562
+ re2_pattern *p;
563
+ map<string, int> groups;
564
+ string name_as_string(name);
565
+
566
+ Data_Get_Struct(self, re2_matchdata, m);
567
+ Data_Get_Struct(m->regexp, re2_pattern, p);
568
+
569
+ groups = p->pattern->NamedCapturingGroups();
570
+
571
+ if (groups.count(name_as_string) == 1) {
572
+ idx = groups[name_as_string];
573
+ return re2_matchdata_nth_match(idx, self);
574
+ } else {
575
+ return Qnil;
576
+ }
577
+ }
578
+
579
+ /*
580
+ * Retrieve zero, one or more matches by index or name.
581
+ *
582
+ * @return [Array<String, nil>, String, Boolean]
583
+ *
584
+ * @overload [](index)
585
+ * Access a particular match by index.
586
+ *
587
+ * @param [Integer] index the index of the match to fetch
588
+ * @return [String, nil] the specified match
589
+ * @example
590
+ * m = RE2::Regexp.new('(\d+)').match("bob 123")
591
+ * m[0] #=> "123"
592
+ *
593
+ * @overload [](start, length)
594
+ * Access a range of matches by starting index and length.
595
+ *
596
+ * @param [Integer] start the index from which to start
597
+ * @param [Integer] length the number of elements to fetch
598
+ * @return [Array<String, nil>] the specified matches
599
+ * @example
600
+ * m = RE2::Regexp.new('(\d+)').match("bob 123")
601
+ * m[0, 1] #=> ["123"]
602
+ *
603
+ * @overload [](range)
604
+ * Access a range of matches by index.
605
+ *
606
+ * @param [Range] range the range of match indexes to fetch
607
+ * @return [Array<String, nil>] the specified matches
608
+ * @example
609
+ * m = RE2::Regexp.new('(\d+)').match("bob 123")
610
+ * m[0..1] #=> "[123", "123"]
611
+ *
612
+ * @overload [](name)
613
+ * Access a particular match by name.
614
+ *
615
+ * @param [String, Symbol] name the name of the match to fetch
616
+ * @return [String, nil] the specific match
617
+ * @example
618
+ * m = RE2::Regexp.new('(?P<number>\d+)').match("bob 123")
619
+ * m["number"] #=> "123"
620
+ * m[:number] #=> "123"
621
+ */
622
+ static VALUE re2_matchdata_aref(int argc, VALUE *argv, VALUE self) {
623
+ VALUE idx, rest;
624
+ rb_scan_args(argc, argv, "11", &idx, &rest);
625
+
626
+ if (TYPE(idx) == T_STRING) {
627
+ return re2_matchdata_named_match(StringValuePtr(idx), self);
628
+ } else if (SYMBOL_P(idx)) {
629
+ return re2_matchdata_named_match(rb_id2name(SYM2ID(idx)), self);
630
+ } else if (!NIL_P(rest) || !FIXNUM_P(idx) || FIX2INT(idx) < 0) {
631
+ return rb_ary_aref(argc, argv, re2_matchdata_to_a(self));
632
+ } else {
633
+ return re2_matchdata_nth_match(FIX2INT(idx), self);
634
+ }
635
+ }
636
+
637
+ /*
638
+ * Returns the entire matched string.
639
+ *
640
+ * @return [String] the entire matched string
641
+ */
642
+ static VALUE re2_matchdata_to_s(VALUE self) {
643
+ return re2_matchdata_nth_match(0, self);
644
+ }
645
+
646
+ /*
647
+ * Returns a printable version of the match.
648
+ *
649
+ * @return [String] a printable version of the match
650
+ * @example
651
+ * m = RE2::Regexp.new('(\d+)').match("bob 123")
652
+ * m.inspect #=> "#<RE2::MatchData \"123\" 1:\"123\">"
653
+ */
654
+ static VALUE re2_matchdata_inspect(VALUE self) {
655
+ int i;
656
+ re2_matchdata *m;
657
+ re2_pattern *p;
658
+ VALUE match, result;
659
+ ostringstream output;
660
+
661
+ Data_Get_Struct(self, re2_matchdata, m);
662
+ Data_Get_Struct(m->regexp, re2_pattern, p);
663
+
664
+ output << "#<RE2::MatchData";
665
+
666
+ for (i = 0; i < m->number_of_matches; i++) {
667
+ output << " ";
668
+
669
+ if (i > 0) {
670
+ output << i << ":";
671
+ }
672
+
673
+ match = re2_matchdata_nth_match(i, self);
674
+
675
+ if (match == Qnil) {
676
+ output << "nil";
677
+ } else {
678
+ output << "\"" << StringValuePtr(match) << "\"";
679
+ }
680
+ }
681
+
682
+ output << ">";
683
+
684
+ result = ENCODED_STR_NEW(output.str().data(), output.str().length(),
685
+ p->pattern->options().encoding() == RE2::Options::EncodingUTF8 ? "UTF-8" : "ISO-8859-1");
686
+
687
+ return result;
688
+ }
689
+
690
+ /*
691
+ * Returns the array of submatches for pattern matching.
692
+ *
693
+ * @return [Array<String, nil>] the array of submatches
694
+ * @example
695
+ * m = RE2::Regexp.new('(\d+)').match("bob 123")
696
+ * m.deconstruct #=> ["123"]
697
+ *
698
+ * @example pattern matching
699
+ * case RE2::Regexp.new('(\d+) (\d+)').match("bob 123 456")
700
+ * in x, y
701
+ * puts "Matched #{x} #{y}"
702
+ * else
703
+ * puts "Unrecognised match"
704
+ * end
705
+ */
706
+ static VALUE re2_matchdata_deconstruct(VALUE self) {
707
+ int i;
708
+ re2_matchdata *m;
709
+ re2_pattern *p;
710
+ re2::StringPiece *match;
711
+ VALUE array;
712
+
713
+ Data_Get_Struct(self, re2_matchdata, m);
714
+ Data_Get_Struct(m->regexp, re2_pattern, p);
715
+
716
+ array = rb_ary_new2(m->number_of_matches - 1);
717
+ for (i = 1; i < m->number_of_matches; i++) {
718
+ match = &m->matches[i];
719
+
720
+ if (match->empty()) {
721
+ rb_ary_push(array, Qnil);
722
+ } else {
723
+ rb_ary_push(array, ENCODED_STR_NEW(match->data(), match->size(),
724
+ p->pattern->options().encoding() == RE2::Options::EncodingUTF8 ? "UTF-8" : "ISO-8859-1"));
725
+ }
726
+ }
727
+
728
+ return array;
729
+ }
730
+
731
+ /*
732
+ * Returns a hash of capturing group names to submatches for pattern matching.
733
+ *
734
+ * As this is used by Ruby's pattern matching, it will return an empty hash if given
735
+ * more keys than there are capturing groups. Given keys will populate the hash in
736
+ * order but an invalid name will cause the hash to be immediately returned.
737
+ *
738
+ * @return [Hash] a hash of capturing group names to submatches
739
+ * @param [Array<Symbol>, nil] keys an array of Symbol capturing group names or nil to return all names
740
+ * @example
741
+ * m = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')
742
+ * m.deconstruct_keys(nil) #=> {:numbers => "123", :letters => "abc"}
743
+ * m.deconstruct_keys([:numbers]) #=> {:numbers => "123"}
744
+ * m.deconstruct_keys([:fruit]) #=> {}
745
+ * m.deconstruct_keys([:letters, :fruit]) #=> {:letters => "abc"}
746
+ *
747
+ * @example pattern matching
748
+ * case RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')
749
+ * in numbers:, letters:
750
+ * puts "Numbers: #{numbers}, letters: #{letters}"
751
+ * else
752
+ * puts "Unrecognised match"
753
+ * end
754
+ */
755
+ static VALUE re2_matchdata_deconstruct_keys(VALUE self, VALUE keys) {
756
+ int i;
757
+ VALUE capturing_groups, key;
758
+ re2_matchdata *m;
759
+ re2_pattern *p;
760
+ map<string, int> groups;
761
+ map<string, int>::iterator iterator;
762
+
763
+ Data_Get_Struct(self, re2_matchdata, m);
764
+ Data_Get_Struct(m->regexp, re2_pattern, p);
765
+
766
+ groups = p->pattern->NamedCapturingGroups();
767
+ capturing_groups = rb_hash_new();
768
+
769
+ if (NIL_P(keys)) {
770
+ for (iterator = groups.begin(); iterator != groups.end(); iterator++) {
771
+ rb_hash_aset(capturing_groups,
772
+ ID2SYM(rb_intern(iterator->first.data())),
773
+ re2_matchdata_nth_match(iterator->second, self));
774
+ }
775
+ } else {
776
+ Check_Type(keys, T_ARRAY);
777
+
778
+ if (p->pattern->NumberOfCapturingGroups() >= RARRAY_LEN(keys)) {
779
+ for (i = 0; i < RARRAY_LEN(keys); i++) {
780
+ key = rb_ary_entry(keys, i);
781
+ Check_Type(key, T_SYMBOL);
782
+ string name(rb_id2name(SYM2ID(key)));
783
+
784
+ if (groups.count(name) == 0) {
785
+ break;
786
+ }
787
+
788
+ rb_hash_aset(capturing_groups, key, re2_matchdata_nth_match(groups[name], self));
789
+ }
790
+ }
791
+ }
792
+
793
+ return capturing_groups;
794
+ }
795
+
796
+ /*
797
+ * Returns a new RE2 object with a compiled version of
798
+ * +pattern+ stored inside. Equivalent to +RE2::Regexp.new+.
799
+ *
800
+ * @see RE2::Regexp#initialize
801
+ *
802
+ */
803
+ static VALUE re2_re2(int argc, VALUE *argv, VALUE self) {
804
+ UNUSED(self);
805
+ return rb_class_new_instance(argc, argv, re2_cRegexp);
806
+ }
807
+
808
+ /*
809
+ * Returns a new {RE2::Regexp} object with a compiled version of
810
+ * +pattern+ stored inside.
811
+ *
812
+ * @return [RE2::Regexp]
813
+ *
814
+ * @overload initialize(pattern)
815
+ * Returns a new {RE2::Regexp} object with a compiled version of
816
+ * +pattern+ stored inside with the default options.
817
+ *
818
+ * @param [String] pattern the pattern to compile
819
+ * @return [RE2::Regexp] an RE2::Regexp with the specified pattern
820
+ * @raise [NoMemoryError] if memory could not be allocated for the compiled
821
+ * pattern
822
+ *
823
+ * @overload initialize(pattern, options)
824
+ * Returns a new {RE2::Regexp} object with a compiled version of
825
+ * +pattern+ stored inside with the specified options.
826
+ *
827
+ * @param [String] pattern the pattern to compile
828
+ * @param [Hash] options the options with which to compile the pattern
829
+ * @option options [Boolean] :utf8 (true) text and pattern are UTF-8; otherwise Latin-1
830
+ * @option options [Boolean] :posix_syntax (false) restrict regexps to POSIX egrep syntax
831
+ * @option options [Boolean] :longest_match (false) search for longest match, not first match
832
+ * @option options [Boolean] :log_errors (true) log syntax and execution errors to ERROR
833
+ * @option options [Integer] :max_mem approx. max memory footprint of RE2
834
+ * @option options [Boolean] :literal (false) interpret string as literal, not regexp
835
+ * @option options [Boolean] :never_nl (false) never match \n, even if it is in regexp
836
+ * @option options [Boolean] :case_sensitive (true) match is case-sensitive (regexp can override with (?i) unless in posix_syntax mode)
837
+ * @option options [Boolean] :perl_classes (false) allow Perl's \d \s \w \D \S \W when in posix_syntax mode
838
+ * @option options [Boolean] :word_boundary (false) allow \b \B (word boundary and not) when in posix_syntax mode
839
+ * @option options [Boolean] :one_line (false) ^ and $ only match beginning and end of text when in posix_syntax mode
840
+ * @return [RE2::Regexp] an RE2::Regexp with the specified pattern and options
841
+ * @raise [NoMemoryError] if memory could not be allocated for the compiled pattern
842
+ */
843
+ static VALUE re2_regexp_initialize(int argc, VALUE *argv, VALUE self) {
844
+ VALUE pattern, options;
845
+ re2_pattern *p;
846
+
847
+ rb_scan_args(argc, argv, "11", &pattern, &options);
848
+ Data_Get_Struct(self, re2_pattern, p);
849
+
850
+ if (RTEST(options)) {
851
+ RE2::Options re2_options;
852
+ parse_re2_options(re2_options, options);
853
+
854
+ p->pattern = new(nothrow) RE2(StringValuePtr(pattern), re2_options);
855
+ } else {
856
+ p->pattern = new(nothrow) RE2(StringValuePtr(pattern));
857
+ }
858
+
859
+ if (p->pattern == 0) {
860
+ rb_raise(rb_eNoMemError, "not enough memory to allocate RE2 object");
861
+ }
862
+
863
+ return self;
864
+ }
865
+
866
+ /*
867
+ * Returns a printable version of the regular expression +re2+.
868
+ *
869
+ * @return [String] a printable version of the regular expression
870
+ * @example
871
+ * re2 = RE2::Regexp.new("woo?")
872
+ * re2.inspect #=> "#<RE2::Regexp /woo?/>"
873
+ */
874
+ static VALUE re2_regexp_inspect(VALUE self) {
875
+ re2_pattern *p;
876
+ VALUE result;
877
+ ostringstream output;
878
+
879
+ Data_Get_Struct(self, re2_pattern, p);
880
+
881
+ output << "#<RE2::Regexp /" << p->pattern->pattern() << "/>";
882
+
883
+ result = ENCODED_STR_NEW(output.str().data(), output.str().length(),
884
+ p->pattern->options().encoding() == RE2::Options::EncodingUTF8 ? "UTF-8" : "ISO-8859-1");
885
+
886
+ return result;
887
+ }
888
+
889
+ /*
890
+ * Returns a string version of the regular expression +re2+.
891
+ *
892
+ * @return [String] a string version of the regular expression
893
+ * @example
894
+ * re2 = RE2::Regexp.new("woo?")
895
+ * re2.to_s #=> "woo?"
896
+ */
897
+ static VALUE re2_regexp_to_s(VALUE self) {
898
+ re2_pattern *p;
899
+ Data_Get_Struct(self, re2_pattern, p);
900
+ return ENCODED_STR_NEW(p->pattern->pattern().data(),
901
+ p->pattern->pattern().size(),
902
+ p->pattern->options().encoding() == RE2::Options::EncodingUTF8 ? "UTF-8" : "ISO-8859-1");
903
+ }
904
+
905
+ /*
906
+ * Returns whether or not the regular expression +re2+
907
+ * was compiled successfully or not.
908
+ *
909
+ * @return [Boolean] whether or not compilation was successful
910
+ * @example
911
+ * re2 = RE2::Regexp.new("woo?")
912
+ * re2.ok? #=> true
913
+ */
914
+ static VALUE re2_regexp_ok(VALUE self) {
915
+ re2_pattern *p;
916
+ Data_Get_Struct(self, re2_pattern, p);
917
+ return BOOL2RUBY(p->pattern->ok());
918
+ }
919
+
920
+ /*
921
+ * Returns whether or not the regular expression +re2+
922
+ * was compiled with the utf8 option set to true.
923
+ *
924
+ * @return [Boolean] the utf8 option
925
+ * @example
926
+ * re2 = RE2::Regexp.new("woo?", :utf8 => true)
927
+ * re2.utf8? #=> true
928
+ */
929
+ static VALUE re2_regexp_utf8(VALUE self) {
930
+ re2_pattern *p;
931
+ Data_Get_Struct(self, re2_pattern, p);
932
+ return BOOL2RUBY(p->pattern->options().encoding() == RE2::Options::EncodingUTF8);
933
+ }
934
+
935
+ /*
936
+ * Returns whether or not the regular expression +re2+
937
+ * was compiled with the posix_syntax option set to true.
938
+ *
939
+ * @return [Boolean] the posix_syntax option
940
+ * @example
941
+ * re2 = RE2::Regexp.new("woo?", :posix_syntax => true)
942
+ * re2.posix_syntax? #=> true
943
+ */
944
+ static VALUE re2_regexp_posix_syntax(VALUE self) {
945
+ re2_pattern *p;
946
+ Data_Get_Struct(self, re2_pattern, p);
947
+ return BOOL2RUBY(p->pattern->options().posix_syntax());
948
+ }
949
+
950
+ /*
951
+ * Returns whether or not the regular expression +re2+
952
+ * was compiled with the longest_match option set to true.
953
+ *
954
+ * @return [Boolean] the longest_match option
955
+ * @example
956
+ * re2 = RE2::Regexp.new("woo?", :longest_match => true)
957
+ * re2.longest_match? #=> true
958
+ */
959
+ static VALUE re2_regexp_longest_match(VALUE self) {
960
+ re2_pattern *p;
961
+ Data_Get_Struct(self, re2_pattern, p);
962
+ return BOOL2RUBY(p->pattern->options().longest_match());
963
+ }
964
+
965
+ /*
966
+ * Returns whether or not the regular expression +re2+
967
+ * was compiled with the log_errors option set to true.
968
+ *
969
+ * @return [Boolean] the log_errors option
970
+ * @example
971
+ * re2 = RE2::Regexp.new("woo?", :log_errors => true)
972
+ * re2.log_errors? #=> true
973
+ */
974
+ static VALUE re2_regexp_log_errors(VALUE self) {
975
+ re2_pattern *p;
976
+ Data_Get_Struct(self, re2_pattern, p);
977
+ return BOOL2RUBY(p->pattern->options().log_errors());
978
+ }
979
+
980
+ /*
981
+ * Returns the max_mem setting for the regular expression
982
+ * +re2+.
983
+ *
984
+ * @return [Integer] the max_mem option
985
+ * @example
986
+ * re2 = RE2::Regexp.new("woo?", :max_mem => 1024)
987
+ * re2.max_mem #=> 1024
988
+ */
989
+ static VALUE re2_regexp_max_mem(VALUE self) {
990
+ re2_pattern *p;
991
+ Data_Get_Struct(self, re2_pattern, p);
992
+ return INT2FIX(p->pattern->options().max_mem());
993
+ }
994
+
995
+ /*
996
+ * Returns whether or not the regular expression +re2+
997
+ * was compiled with the literal option set to true.
998
+ *
999
+ * @return [Boolean] the literal option
1000
+ * @example
1001
+ * re2 = RE2::Regexp.new("woo?", :literal => true)
1002
+ * re2.literal? #=> true
1003
+ */
1004
+ static VALUE re2_regexp_literal(VALUE self) {
1005
+ re2_pattern *p;
1006
+ Data_Get_Struct(self, re2_pattern, p);
1007
+ return BOOL2RUBY(p->pattern->options().literal());
1008
+ }
1009
+
1010
+ /*
1011
+ * Returns whether or not the regular expression +re2+
1012
+ * was compiled with the never_nl option set to true.
1013
+ *
1014
+ * @return [Boolean] the never_nl option
1015
+ * @example
1016
+ * re2 = RE2::Regexp.new("woo?", :never_nl => true)
1017
+ * re2.never_nl? #=> true
1018
+ */
1019
+ static VALUE re2_regexp_never_nl(VALUE self) {
1020
+ re2_pattern *p;
1021
+ Data_Get_Struct(self, re2_pattern, p);
1022
+ return BOOL2RUBY(p->pattern->options().never_nl());
1023
+ }
1024
+
1025
+ /*
1026
+ * Returns whether or not the regular expression +re2+
1027
+ * was compiled with the case_sensitive option set to true.
1028
+ *
1029
+ * @return [Boolean] the case_sensitive option
1030
+ * @example
1031
+ * re2 = RE2::Regexp.new("woo?", :case_sensitive => true)
1032
+ * re2.case_sensitive? #=> true
1033
+ */
1034
+ static VALUE re2_regexp_case_sensitive(VALUE self) {
1035
+ re2_pattern *p;
1036
+ Data_Get_Struct(self, re2_pattern, p);
1037
+ return BOOL2RUBY(p->pattern->options().case_sensitive());
1038
+ }
1039
+
1040
+ /*
1041
+ * Returns whether or not the regular expression +re2+
1042
+ * was compiled with the case_sensitive option set to false.
1043
+ *
1044
+ * @return [Boolean] the inverse of the case_sensitive option
1045
+ * @example
1046
+ * re2 = RE2::Regexp.new("woo?", :case_sensitive => true)
1047
+ * re2.case_insensitive? #=> false
1048
+ * re2.casefold? #=> false
1049
+ */
1050
+ static VALUE re2_regexp_case_insensitive(VALUE self) {
1051
+ return BOOL2RUBY(re2_regexp_case_sensitive(self) != Qtrue);
1052
+ }
1053
+
1054
+ /*
1055
+ * Returns whether or not the regular expression +re2+
1056
+ * was compiled with the perl_classes option set to true.
1057
+ *
1058
+ * @return [Boolean] the perl_classes option
1059
+ * @example
1060
+ * re2 = RE2::Regexp.new("woo?", :perl_classes => true)
1061
+ * re2.perl_classes? #=> true
1062
+ */
1063
+ static VALUE re2_regexp_perl_classes(VALUE self) {
1064
+ re2_pattern *p;
1065
+ Data_Get_Struct(self, re2_pattern, p);
1066
+ return BOOL2RUBY(p->pattern->options().perl_classes());
1067
+ }
1068
+
1069
+ /*
1070
+ * Returns whether or not the regular expression +re2+
1071
+ * was compiled with the word_boundary option set to true.
1072
+ *
1073
+ * @return [Boolean] the word_boundary option
1074
+ * @example
1075
+ * re2 = RE2::Regexp.new("woo?", :word_boundary => true)
1076
+ * re2.word_boundary? #=> true
1077
+ */
1078
+ static VALUE re2_regexp_word_boundary(VALUE self) {
1079
+ re2_pattern *p;
1080
+ Data_Get_Struct(self, re2_pattern, p);
1081
+ return BOOL2RUBY(p->pattern->options().word_boundary());
1082
+ }
1083
+
1084
+ /*
1085
+ * Returns whether or not the regular expression +re2+
1086
+ * was compiled with the one_line option set to true.
1087
+ *
1088
+ * @return [Boolean] the one_line option
1089
+ * @example
1090
+ * re2 = RE2::Regexp.new("woo?", :one_line => true)
1091
+ * re2.one_line? #=> true
1092
+ */
1093
+ static VALUE re2_regexp_one_line(VALUE self) {
1094
+ re2_pattern *p;
1095
+ Data_Get_Struct(self, re2_pattern, p);
1096
+ return BOOL2RUBY(p->pattern->options().one_line());
1097
+ }
1098
+
1099
+ /*
1100
+ * If the RE2 could not be created properly, returns an
1101
+ * error string otherwise returns nil.
1102
+ *
1103
+ * @return [String, nil] the error string or nil
1104
+ */
1105
+ static VALUE re2_regexp_error(VALUE self) {
1106
+ re2_pattern *p;
1107
+ Data_Get_Struct(self, re2_pattern, p);
1108
+ if (p->pattern->ok()) {
1109
+ return Qnil;
1110
+ } else {
1111
+ return rb_str_new(p->pattern->error().data(), p->pattern->error().size());
1112
+ }
1113
+ }
1114
+
1115
+ /*
1116
+ * If the RE2 could not be created properly, returns
1117
+ * the offending portion of the regexp otherwise returns nil.
1118
+ *
1119
+ * @return [String, nil] the offending portion of the regexp or nil
1120
+ */
1121
+ static VALUE re2_regexp_error_arg(VALUE self) {
1122
+ re2_pattern *p;
1123
+ Data_Get_Struct(self, re2_pattern, p);
1124
+ if (p->pattern->ok()) {
1125
+ return Qnil;
1126
+ } else {
1127
+ return ENCODED_STR_NEW(p->pattern->error_arg().data(),
1128
+ p->pattern->error_arg().size(),
1129
+ p->pattern->options().encoding() == RE2::Options::EncodingUTF8 ? "UTF-8" : "ISO-8859-1");
1130
+ }
1131
+ }
1132
+
1133
+ /*
1134
+ * Returns the program size, a very approximate measure
1135
+ * of a regexp's "cost". Larger numbers are more expensive
1136
+ * than smaller numbers.
1137
+ *
1138
+ * @return [Integer] the regexp "cost"
1139
+ */
1140
+ static VALUE re2_regexp_program_size(VALUE self) {
1141
+ re2_pattern *p;
1142
+ Data_Get_Struct(self, re2_pattern, p);
1143
+ return INT2FIX(p->pattern->ProgramSize());
1144
+ }
1145
+
1146
+ /*
1147
+ * Returns a hash of the options currently set for
1148
+ * +re2+.
1149
+ *
1150
+ * @return [Hash] the options
1151
+ */
1152
+ static VALUE re2_regexp_options(VALUE self) {
1153
+ VALUE options;
1154
+ re2_pattern *p;
1155
+
1156
+ Data_Get_Struct(self, re2_pattern, p);
1157
+ options = rb_hash_new();
1158
+
1159
+ rb_hash_aset(options, ID2SYM(id_utf8),
1160
+ BOOL2RUBY(p->pattern->options().encoding() == RE2::Options::EncodingUTF8));
1161
+
1162
+ rb_hash_aset(options, ID2SYM(id_posix_syntax),
1163
+ BOOL2RUBY(p->pattern->options().posix_syntax()));
1164
+
1165
+ rb_hash_aset(options, ID2SYM(id_longest_match),
1166
+ BOOL2RUBY(p->pattern->options().longest_match()));
1167
+
1168
+ rb_hash_aset(options, ID2SYM(id_log_errors),
1169
+ BOOL2RUBY(p->pattern->options().log_errors()));
1170
+
1171
+ rb_hash_aset(options, ID2SYM(id_max_mem),
1172
+ INT2FIX(p->pattern->options().max_mem()));
1173
+
1174
+ rb_hash_aset(options, ID2SYM(id_literal),
1175
+ BOOL2RUBY(p->pattern->options().literal()));
1176
+
1177
+ rb_hash_aset(options, ID2SYM(id_never_nl),
1178
+ BOOL2RUBY(p->pattern->options().never_nl()));
1179
+
1180
+ rb_hash_aset(options, ID2SYM(id_case_sensitive),
1181
+ BOOL2RUBY(p->pattern->options().case_sensitive()));
1182
+
1183
+ rb_hash_aset(options, ID2SYM(id_perl_classes),
1184
+ BOOL2RUBY(p->pattern->options().perl_classes()));
1185
+
1186
+ rb_hash_aset(options, ID2SYM(id_word_boundary),
1187
+ BOOL2RUBY(p->pattern->options().word_boundary()));
1188
+
1189
+ rb_hash_aset(options, ID2SYM(id_one_line),
1190
+ BOOL2RUBY(p->pattern->options().one_line()));
1191
+
1192
+ /* This is a read-only hash after all... */
1193
+ rb_obj_freeze(options);
1194
+
1195
+ return options;
1196
+ }
1197
+
1198
+ /*
1199
+ * Returns the number of capturing subpatterns, or -1 if the regexp
1200
+ * wasn't valid on construction. The overall match ($0) does not
1201
+ * count: if the regexp is "(a)(b)", returns 2.
1202
+ *
1203
+ * @return [Integer] the number of capturing subpatterns
1204
+ */
1205
+ static VALUE re2_regexp_number_of_capturing_groups(VALUE self) {
1206
+ re2_pattern *p;
1207
+
1208
+ Data_Get_Struct(self, re2_pattern, p);
1209
+ return INT2FIX(p->pattern->NumberOfCapturingGroups());
1210
+ }
1211
+
1212
+ /*
1213
+ * Returns a hash of names to capturing indices of groups.
1214
+ *
1215
+ * @return [Hash] a hash of names to capturing indices
1216
+ */
1217
+ static VALUE re2_regexp_named_capturing_groups(VALUE self) {
1218
+ VALUE capturing_groups;
1219
+ re2_pattern *p;
1220
+ map<string, int> groups;
1221
+ map<string, int>::iterator iterator;
1222
+
1223
+ Data_Get_Struct(self, re2_pattern, p);
1224
+ groups = p->pattern->NamedCapturingGroups();
1225
+ capturing_groups = rb_hash_new();
1226
+
1227
+ for (iterator = groups.begin(); iterator != groups.end(); iterator++) {
1228
+ rb_hash_aset(capturing_groups,
1229
+ ENCODED_STR_NEW(iterator->first.data(), iterator->first.size(),
1230
+ p->pattern->options().encoding() == RE2::Options::EncodingUTF8 ? "UTF-8" : "ISO-8859-1"),
1231
+ INT2FIX(iterator->second));
1232
+ }
1233
+
1234
+ return capturing_groups;
1235
+ }
1236
+
1237
+ /*
1238
+ * Match the pattern against the given +text+ and return either
1239
+ * a boolean (if no submatches are required) or a {RE2::MatchData}
1240
+ * instance.
1241
+ *
1242
+ * @return [Boolean, RE2::MatchData]
1243
+ *
1244
+ * @overload match(text)
1245
+ * Returns an {RE2::MatchData} containing the matching
1246
+ * pattern and all subpatterns resulting from looking for
1247
+ * the regexp in +text+.
1248
+ *
1249
+ * @param [String] text the text to search
1250
+ * @return [RE2::MatchData] the matches
1251
+ * @raise [NoMemoryError] if there was not enough memory to allocate the matches
1252
+ * @example
1253
+ * r = RE2::Regexp.new('w(o)(o)')
1254
+ * r.match('woo') #=> #<RE2::MatchData "woo" 1:"o" 2:"o">
1255
+ *
1256
+ * @overload match(text, 0)
1257
+ * Returns either true or false indicating whether a
1258
+ * successful match was made.
1259
+ *
1260
+ * @param [String] text the text to search
1261
+ * @return [Boolean] whether the match was successful
1262
+ * @raise [NoMemoryError] if there was not enough memory to allocate the matches
1263
+ * @example
1264
+ * r = RE2::Regexp.new('w(o)(o)')
1265
+ * r.match('woo', 0) #=> true
1266
+ * r.match('bob', 0) #=> false
1267
+ *
1268
+ * @overload match(text, number_of_matches)
1269
+ * See +match(text)+ but with a specific number of
1270
+ * matches returned (padded with nils if necessary).
1271
+ *
1272
+ * @param [String] text the text to search
1273
+ * @param [Integer] number_of_matches the number of matches to return
1274
+ * @return [RE2::MatchData] the matches
1275
+ * @raise [ArgumentError] if given a negative number of matches
1276
+ * @raise [NoMemoryError] if there was not enough memory to allocate the matches
1277
+ * @example
1278
+ * r = RE2::Regexp.new('w(o)(o)')
1279
+ * r.match('woo', 1) #=> #<RE2::MatchData "woo" 1:"o">
1280
+ * r.match('woo', 3) #=> #<RE2::MatchData "woo" 1:"o" 2:"o" 3:nil>
1281
+ */
1282
+ static VALUE re2_regexp_match(int argc, VALUE *argv, VALUE self) {
1283
+ int n;
1284
+ bool matched;
1285
+ re2_pattern *p;
1286
+ re2_matchdata *m;
1287
+ VALUE text, number_of_matches, matchdata;
1288
+
1289
+ rb_scan_args(argc, argv, "11", &text, &number_of_matches);
1290
+
1291
+ /* Ensure text is a string. */
1292
+ text = StringValue(text);
1293
+
1294
+ Data_Get_Struct(self, re2_pattern, p);
1295
+
1296
+ if (RTEST(number_of_matches)) {
1297
+ n = NUM2INT(number_of_matches);
1298
+
1299
+ if (n < 0) {
1300
+ rb_raise(rb_eArgError, "number of matches should be >= 0");
1301
+ }
1302
+ } else {
1303
+ if (!p->pattern->ok()) {
1304
+ return Qnil;
1305
+ }
1306
+
1307
+ n = p->pattern->NumberOfCapturingGroups();
1308
+ }
1309
+
1310
+ if (n == 0) {
1311
+ matched = match(p->pattern, StringValuePtr(text), 0,
1312
+ static_cast<int>(RSTRING_LEN(text)), RE2::UNANCHORED, 0, 0);
1313
+ return BOOL2RUBY(matched);
1314
+ } else {
1315
+
1316
+ /* Because match returns the whole match as well. */
1317
+ n += 1;
1318
+
1319
+ matchdata = rb_class_new_instance(0, 0, re2_cMatchData);
1320
+ Data_Get_Struct(matchdata, re2_matchdata, m);
1321
+ m->matches = new(nothrow) re2::StringPiece[n];
1322
+ m->regexp = self;
1323
+ m->text = rb_str_dup(text);
1324
+ rb_str_freeze(m->text);
1325
+
1326
+ if (m->matches == 0) {
1327
+ rb_raise(rb_eNoMemError,
1328
+ "not enough memory to allocate StringPieces for matches");
1329
+ }
1330
+
1331
+ m->number_of_matches = n;
1332
+
1333
+ matched = match(p->pattern, StringValuePtr(m->text), 0,
1334
+ static_cast<int>(RSTRING_LEN(m->text)),
1335
+ RE2::UNANCHORED, m->matches, n);
1336
+
1337
+ if (matched) {
1338
+ return matchdata;
1339
+ } else {
1340
+ return Qnil;
1341
+ }
1342
+ }
1343
+ }
1344
+
1345
+ /*
1346
+ * Returns true or false to indicate a successful match.
1347
+ * Equivalent to +re2.match(text, 0)+.
1348
+ *
1349
+ * @return [Boolean] whether the match was successful
1350
+ */
1351
+ static VALUE re2_regexp_match_p(VALUE self, VALUE text) {
1352
+ VALUE argv[2];
1353
+ argv[0] = text;
1354
+ argv[1] = INT2FIX(0);
1355
+
1356
+ return re2_regexp_match(2, argv, self);
1357
+ }
1358
+
1359
+ /*
1360
+ * Returns a {RE2::Scanner} for scanning the given text incrementally.
1361
+ *
1362
+ * @example
1363
+ * c = RE2::Regexp.new('(\w+)').scan("Foo bar baz")
1364
+ */
1365
+ static VALUE re2_regexp_scan(VALUE self, VALUE text) {
1366
+ re2_pattern *p;
1367
+ re2_scanner *c;
1368
+ VALUE scanner;
1369
+
1370
+ Data_Get_Struct(self, re2_pattern, p);
1371
+ scanner = rb_class_new_instance(0, 0, re2_cScanner);
1372
+ Data_Get_Struct(scanner, re2_scanner, c);
1373
+
1374
+ c->input = new(nothrow) re2::StringPiece(StringValuePtr(text));
1375
+ c->regexp = self;
1376
+ c->text = text;
1377
+
1378
+ if (p->pattern->ok()) {
1379
+ c->number_of_capturing_groups = p->pattern->NumberOfCapturingGroups();
1380
+ } else {
1381
+ c->number_of_capturing_groups = 0;
1382
+ }
1383
+
1384
+ c->eof = false;
1385
+
1386
+ return scanner;
1387
+ }
1388
+
1389
+ /*
1390
+ * Returns a copy of +str+ with the first occurrence +pattern+
1391
+ * replaced with +rewrite+.
1392
+ *
1393
+ * @param [String] str the string to modify
1394
+ * @param [String, RE2::Regexp] pattern a regexp matching text to be replaced
1395
+ * @param [String] rewrite the string to replace with
1396
+ * @return [String] the resulting string
1397
+ * @example
1398
+ * RE2.Replace("hello there", "hello", "howdy") #=> "howdy there"
1399
+ * re2 = RE2::Regexp.new("hel+o")
1400
+ * RE2.Replace("hello there", re2, "yo") #=> "yo there"
1401
+ */
1402
+ static VALUE re2_Replace(VALUE self, VALUE str, VALUE pattern,
1403
+ VALUE rewrite) {
1404
+ UNUSED(self);
1405
+ re2_pattern *p;
1406
+
1407
+ /* Convert all the inputs to be pumped into RE2::Replace. */
1408
+ string str_as_string(StringValuePtr(str));
1409
+
1410
+ /* Do the replacement. */
1411
+ if (rb_obj_is_kind_of(pattern, re2_cRegexp)) {
1412
+ Data_Get_Struct(pattern, re2_pattern, p);
1413
+ RE2::Replace(&str_as_string, *p->pattern, StringValuePtr(rewrite));
1414
+
1415
+ return ENCODED_STR_NEW(str_as_string.data(), str_as_string.size(),
1416
+ p->pattern->options().encoding() == RE2::Options::EncodingUTF8 ? "UTF-8" : "ISO-8859-1");
1417
+ } else {
1418
+ RE2::Replace(&str_as_string, StringValuePtr(pattern),
1419
+ StringValuePtr(rewrite));
1420
+
1421
+ return ENCODED_STR_NEW2(str_as_string.data(), str_as_string.size(),
1422
+ pattern);
1423
+ }
1424
+
1425
+ }
1426
+
1427
+ /*
1428
+ * Return a copy of +str+ with +pattern+ replaced by +rewrite+.
1429
+ *
1430
+ * @param [String] str the string to modify
1431
+ * @param [String, RE2::Regexp] pattern a regexp matching text to be replaced
1432
+ * @param [String] rewrite the string to replace with
1433
+ * @return [String] the resulting string
1434
+ * @example
1435
+ * re2 = RE2::Regexp.new("oo?")
1436
+ * RE2.GlobalReplace("whoops-doops", re2, "e") #=> "wheps-deps"
1437
+ * RE2.GlobalReplace("hello there", "e", "i") #=> "hillo thiri"
1438
+ */
1439
+ static VALUE re2_GlobalReplace(VALUE self, VALUE str, VALUE pattern,
1440
+ VALUE rewrite) {
1441
+ UNUSED(self);
1442
+
1443
+ /* Convert all the inputs to be pumped into RE2::GlobalReplace. */
1444
+ re2_pattern *p;
1445
+ string str_as_string(StringValuePtr(str));
1446
+
1447
+ /* Do the replacement. */
1448
+ if (rb_obj_is_kind_of(pattern, re2_cRegexp)) {
1449
+ Data_Get_Struct(pattern, re2_pattern, p);
1450
+ RE2::GlobalReplace(&str_as_string, *p->pattern, StringValuePtr(rewrite));
1451
+
1452
+ return ENCODED_STR_NEW(str_as_string.data(), str_as_string.size(),
1453
+ p->pattern->options().encoding() == RE2::Options::EncodingUTF8 ? "UTF-8" : "ISO-8859-1");
1454
+ } else {
1455
+ RE2::GlobalReplace(&str_as_string, StringValuePtr(pattern),
1456
+ StringValuePtr(rewrite));
1457
+
1458
+ return ENCODED_STR_NEW2(str_as_string.data(), str_as_string.size(),
1459
+ pattern);
1460
+ }
1461
+ }
1462
+
1463
+ /*
1464
+ * Returns a version of str with all potentially meaningful regexp
1465
+ * characters escaped. The returned string, used as a regular
1466
+ * expression, will exactly match the original string.
1467
+ *
1468
+ * @param [String] unquoted the unquoted string
1469
+ * @return [String] the escaped string
1470
+ * @example
1471
+ * RE2::Regexp.escape("1.5-2.0?") #=> "1\.5\-2\.0\?"
1472
+ */
1473
+ static VALUE re2_QuoteMeta(VALUE self, VALUE unquoted) {
1474
+ UNUSED(self);
1475
+ string quoted_string = RE2::QuoteMeta(StringValuePtr(unquoted));
1476
+ return rb_str_new(quoted_string.data(), quoted_string.size());
1477
+ }
1478
+
1479
+ void re2_set_free(re2_set *self) {
1480
+ if (self->set) {
1481
+ delete self->set;
1482
+ }
1483
+ free(self);
1484
+ }
1485
+
1486
+ static VALUE re2_set_allocate(VALUE klass) {
1487
+ re2_set *s;
1488
+ VALUE result = Data_Make_Struct(klass, re2_set, 0, re2_set_free, s);
1489
+ return result;
1490
+ }
1491
+
1492
+ /*
1493
+ * Returns a new {RE2::Set} object, a collection of patterns that can be
1494
+ * searched for simultaneously.
1495
+ *
1496
+ * @return [RE2::Set]
1497
+ *
1498
+ * @overload initialize
1499
+ * Returns a new {RE2::Set} object for unanchored patterns with the default
1500
+ * options.
1501
+ *
1502
+ * @raise [NoMemoryError] if memory could not be allocated for the compiled pattern
1503
+ * @return [RE2::Set]
1504
+ *
1505
+ * @overload initialize(anchor)
1506
+ * Returns a new {RE2::Set} object for the specified anchor with the default
1507
+ * options.
1508
+ *
1509
+ * @param [Symbol] anchor One of :unanchored, :anchor_start, :anchor_both
1510
+ * @raise [ArgumentError] if anchor is not :unanchored, :anchor_start or :anchor_both
1511
+ * @raise [NoMemoryError] if memory could not be allocated for the compiled pattern
1512
+ *
1513
+ * @overload initialize(anchor, options)
1514
+ * Returns a new {RE2::Set} object with the specified options.
1515
+ *
1516
+ * @param [Symbol] anchor One of :unanchored, :anchor_start, :anchor_both
1517
+ * @param [Hash] options the options with which to compile the pattern
1518
+ * @option options [Boolean] :utf8 (true) text and pattern are UTF-8; otherwise Latin-1
1519
+ * @option options [Boolean] :posix_syntax (false) restrict regexps to POSIX egrep syntax
1520
+ * @option options [Boolean] :longest_match (false) search for longest match, not first match
1521
+ * @option options [Boolean] :log_errors (true) log syntax and execution errors to ERROR
1522
+ * @option options [Integer] :max_mem approx. max memory footprint of RE2
1523
+ * @option options [Boolean] :literal (false) interpret string as literal, not regexp
1524
+ * @option options [Boolean] :never_nl (false) never match \n, even if it is in regexp
1525
+ * @option options [Boolean] :case_sensitive (true) match is case-sensitive (regexp can override with (?i) unless in posix_syntax mode)
1526
+ * @option options [Boolean] :perl_classes (false) allow Perl's \d \s \w \D \S \W when in posix_syntax mode
1527
+ * @option options [Boolean] :word_boundary (false) allow \b \B (word boundary and not) when in posix_syntax mode
1528
+ * @option options [Boolean] :one_line (false) ^ and $ only match beginning and end of text when in posix_syntax mode
1529
+ * @return [RE2::Set] an RE2::Set with the specified anchor and options
1530
+ * @raise [ArgumentError] if anchor is not one of the accepted choices
1531
+ * @raise [NoMemoryError] if memory could not be allocated for the compiled pattern
1532
+ */
1533
+ static VALUE re2_set_initialize(int argc, VALUE *argv, VALUE self) {
1534
+ VALUE anchor, options;
1535
+ re2_set *s;
1536
+ RE2::Anchor re2_anchor;
1537
+ RE2::Options re2_options;
1538
+
1539
+ rb_scan_args(argc, argv, "02", &anchor, &options);
1540
+ Data_Get_Struct(self, re2_set, s);
1541
+
1542
+ if (RTEST(options)) {
1543
+ parse_re2_options(re2_options, options);
1544
+ }
1545
+ if (NIL_P(anchor)) {
1546
+ re2_anchor = RE2::UNANCHORED;
1547
+ } else {
1548
+ Check_Type(anchor, T_SYMBOL);
1549
+ ID id_anchor = SYM2ID(anchor);
1550
+ if (id_anchor == id_unanchored) {
1551
+ re2_anchor = RE2::UNANCHORED;
1552
+ } else if (id_anchor == id_anchor_start) {
1553
+ re2_anchor = RE2::ANCHOR_START;
1554
+ } else if (id_anchor == id_anchor_both) {
1555
+ re2_anchor = RE2::ANCHOR_BOTH;
1556
+ } else {
1557
+ rb_raise(rb_eArgError, "anchor should be one of: :unanchored, :anchor_start, :anchor_both");
1558
+ }
1559
+ }
1560
+
1561
+ s->set = new(nothrow) RE2::Set(re2_options, re2_anchor);
1562
+ if (s->set == 0) {
1563
+ rb_raise(rb_eNoMemError, "not enough memory to allocate RE2::Set object");
1564
+ }
1565
+
1566
+ return self;
1567
+ }
1568
+
1569
+ /*
1570
+ * Adds a pattern to the set. Returns the index that will identify the pattern
1571
+ * in the output of #match. Cannot be called after #compile has been called.
1572
+ *
1573
+ * @param [String] pattern the regex pattern
1574
+ * @return [Integer] the index of the pattern in the set
1575
+ * @raise [ArgumentError] if called after compile or the pattern is rejected
1576
+ * @example
1577
+ * set = RE2::Set.new
1578
+ * set.add("abc") #=> 0
1579
+ * set.add("def") #=> 1
1580
+ */
1581
+ static VALUE re2_set_add(VALUE self, VALUE pattern) {
1582
+ Check_Type(pattern, T_STRING);
1583
+ re2::StringPiece regex(RSTRING_PTR(pattern), RSTRING_LEN(pattern));
1584
+ std::string err;
1585
+ re2_set *s;
1586
+ Data_Get_Struct(self, re2_set, s);
1587
+ int index = s->set->Add(regex, &err);
1588
+ if (index < 0) {
1589
+ rb_raise(rb_eArgError, "str rejected by RE2::Set->Add(): %s", err.c_str());
1590
+ }
1591
+
1592
+ return INT2FIX(index);
1593
+ }
1594
+
1595
+ /*
1596
+ * Compiles a Set so it can be used to match against. Must be called after #add
1597
+ * and before #match.
1598
+ *
1599
+ * @return [Bool] whether compilation was a success
1600
+ * @example
1601
+ * set = RE2::Set.new
1602
+ * set.add("abc")
1603
+ * set.compile # => true
1604
+ */
1605
+ static VALUE re2_set_compile(VALUE self) {
1606
+ re2_set *s;
1607
+ Data_Get_Struct(self, re2_set, s);
1608
+
1609
+ return BOOL2RUBY(s->set->Compile());
1610
+ }
1611
+
1612
+ /*
1613
+ * Returns whether the underlying re2 version outputs error information from
1614
+ * RE2::Set::Match. If not, #match will raise an error if attempting to set its
1615
+ * :exception option to true.
1616
+ *
1617
+ * @return [Bool] whether the underlying re2 outputs error information from Set matches
1618
+ */
1619
+ static VALUE re2_set_match_raises_errors_p(VALUE self) {
1620
+ UNUSED(self);
1621
+ #ifdef HAVE_ERROR_INFO_ARGUMENT
1622
+ return Qtrue;
1623
+ #else
1624
+ return Qfalse;
1625
+ #endif
1626
+ }
1627
+
1628
+ /*
1629
+ * Matches the given text against patterns in the set, returning an array of
1630
+ * integer indices of the matching patterns if matched or an empty array if
1631
+ * there are no matches.
1632
+ *
1633
+ * @return [Array<Integer>]
1634
+ *
1635
+ * @overload match(str)
1636
+ * Returns an array of integer indices of patterns matching the given string
1637
+ * (if any). Raises exceptions if there are any errors while matching.
1638
+ *
1639
+ * @param [String] str the text to match against
1640
+ * @return [Array<Integer>] the indices of matching regexps
1641
+ * @raise [MatchError] if an error occurs while matching
1642
+ * @raise [UnsupportedError] if the underlying version of re2 does not output error information
1643
+ * @example
1644
+ * set = RE2::Set.new
1645
+ * set.add("abc")
1646
+ * set.add("def")
1647
+ * set.compile
1648
+ * set.match("abcdef") # => [0, 1]
1649
+ *
1650
+ * @overload match(str, options)
1651
+ * Returns an array of integer indices of patterns matching the given string
1652
+ * (if any). Raises exceptions if there are any errors while matching and the
1653
+ * :exception option is set to true.
1654
+ *
1655
+ * @param [String] str the text to match against
1656
+ * @param [Hash] options the options with which to match
1657
+ * @option options [Boolean] :exception (true) whether to raise exceptions with re2's error information (not supported on ABI version 0 of re2)
1658
+ * @return [Array<Integer>] the indices of matching regexps
1659
+ * @raise [MatchError] if an error occurs while matching
1660
+ * @raise [UnsupportedError] if the underlying version of re2 does not output error information
1661
+ * @example
1662
+ * set = RE2::Set.new
1663
+ * set.add("abc")
1664
+ * set.add("def")
1665
+ * set.compile
1666
+ * set.match("abcdef", :exception => true) # => [0, 1]
1667
+ */
1668
+ static VALUE re2_set_match(int argc, VALUE *argv, VALUE self) {
1669
+ VALUE str, options, exception_option;
1670
+ bool raise_exception = true;
1671
+ rb_scan_args(argc, argv, "11", &str, &options);
1672
+ Check_Type(str, T_STRING);
1673
+ re2::StringPiece data(RSTRING_PTR(str), RSTRING_LEN(str));
1674
+ std::vector<int> v;
1675
+ re2_set *s;
1676
+ Data_Get_Struct(self, re2_set, s);
1677
+
1678
+ if (RTEST(options)) {
1679
+ Check_Type(options, T_HASH);
1680
+
1681
+ exception_option = rb_hash_aref(options, ID2SYM(id_exception));
1682
+ if (!NIL_P(exception_option)) {
1683
+ raise_exception = RTEST(exception_option);
1684
+ }
1685
+ }
1686
+
1687
+ if (raise_exception) {
1688
+ #ifdef HAVE_ERROR_INFO_ARGUMENT
1689
+ RE2::Set::ErrorInfo e;
1690
+ bool match_failed = !s->set->Match(data, &v, &e);
1691
+ VALUE result = rb_ary_new2(v.size());
1692
+
1693
+ if (match_failed) {
1694
+ switch (e.kind) {
1695
+ case RE2::Set::kNoError:
1696
+ break;
1697
+ case RE2::Set::kNotCompiled:
1698
+ rb_raise(re2_eSetMatchError, "#match must not be called before #compile");
1699
+ case RE2::Set::kOutOfMemory:
1700
+ rb_raise(re2_eSetMatchError, "The DFA ran out of memory");
1701
+ case RE2::Set::kInconsistent:
1702
+ rb_raise(re2_eSetMatchError, "RE2::Prog internal error");
1703
+ default: // Just in case a future version of libre2 adds new ErrorKinds
1704
+ rb_raise(re2_eSetMatchError, "Unknown RE2::Set::ErrorKind: %d", e.kind);
1705
+ }
1706
+ } else {
1707
+ for (size_t i = 0; i < v.size(); i++) {
1708
+ rb_ary_push(result, INT2FIX(v[i]));
1709
+ }
1710
+ }
1711
+
1712
+ return result;
1713
+ #else
1714
+ rb_raise(re2_eSetUnsupportedError, "current version of RE2::Set::Match() does not output error information, :exception option can only be set to false");
1715
+ #endif
1716
+ } else {
1717
+ bool matched = s->set->Match(data, &v);
1718
+ VALUE result = rb_ary_new2(v.size());
1719
+
1720
+ if (matched) {
1721
+ for (size_t i = 0; i < v.size(); i++) {
1722
+ rb_ary_push(result, INT2FIX(v[i]));
1723
+ }
1724
+ }
1725
+
1726
+ return result;
1727
+ }
1728
+ }
1729
+
1730
+ /* Forward declare Init_re2 to be called by C code but define it separately so
1731
+ * that YARD can parse it.
1732
+ */
1733
+ extern "C" void Init_re2(void);
1734
+
1735
+ void Init_re2(void) {
1736
+ re2_mRE2 = rb_define_module("RE2");
1737
+ re2_cRegexp = rb_define_class_under(re2_mRE2, "Regexp", rb_cObject);
1738
+ re2_cMatchData = rb_define_class_under(re2_mRE2, "MatchData", rb_cObject);
1739
+ re2_cScanner = rb_define_class_under(re2_mRE2, "Scanner", rb_cObject);
1740
+ re2_cSet = rb_define_class_under(re2_mRE2, "Set", rb_cObject);
1741
+ re2_eSetMatchError = rb_define_class_under(re2_cSet, "MatchError",
1742
+ rb_const_get(rb_cObject, rb_intern("StandardError")));
1743
+ re2_eSetUnsupportedError = rb_define_class_under(re2_cSet, "UnsupportedError",
1744
+ rb_const_get(rb_cObject, rb_intern("StandardError")));
1745
+
1746
+ rb_define_alloc_func(re2_cRegexp, (VALUE (*)(VALUE))re2_regexp_allocate);
1747
+ rb_define_alloc_func(re2_cMatchData,
1748
+ (VALUE (*)(VALUE))re2_matchdata_allocate);
1749
+ rb_define_alloc_func(re2_cScanner,
1750
+ (VALUE (*)(VALUE))re2_scanner_allocate);
1751
+ rb_define_alloc_func(re2_cSet, (VALUE (*)(VALUE))re2_set_allocate);
1752
+
1753
+ rb_define_method(re2_cMatchData, "string",
1754
+ RUBY_METHOD_FUNC(re2_matchdata_string), 0);
1755
+ rb_define_method(re2_cMatchData, "regexp",
1756
+ RUBY_METHOD_FUNC(re2_matchdata_regexp), 0);
1757
+ rb_define_method(re2_cMatchData, "to_a",
1758
+ RUBY_METHOD_FUNC(re2_matchdata_to_a), 0);
1759
+ rb_define_method(re2_cMatchData, "size",
1760
+ RUBY_METHOD_FUNC(re2_matchdata_size), 0);
1761
+ rb_define_method(re2_cMatchData, "length",
1762
+ RUBY_METHOD_FUNC(re2_matchdata_size), 0);
1763
+ rb_define_method(re2_cMatchData, "begin",
1764
+ RUBY_METHOD_FUNC(re2_matchdata_begin), 1);
1765
+ rb_define_method(re2_cMatchData, "end",
1766
+ RUBY_METHOD_FUNC(re2_matchdata_end), 1);
1767
+ rb_define_method(re2_cMatchData, "[]", RUBY_METHOD_FUNC(re2_matchdata_aref),
1768
+ -1);
1769
+ rb_define_method(re2_cMatchData, "to_s",
1770
+ RUBY_METHOD_FUNC(re2_matchdata_to_s), 0);
1771
+ rb_define_method(re2_cMatchData, "inspect",
1772
+ RUBY_METHOD_FUNC(re2_matchdata_inspect), 0);
1773
+ rb_define_method(re2_cMatchData, "deconstruct",
1774
+ RUBY_METHOD_FUNC(re2_matchdata_deconstruct), 0);
1775
+ rb_define_method(re2_cMatchData, "deconstruct_keys",
1776
+ RUBY_METHOD_FUNC(re2_matchdata_deconstruct_keys), 1);
1777
+
1778
+ rb_define_method(re2_cScanner, "string",
1779
+ RUBY_METHOD_FUNC(re2_scanner_string), 0);
1780
+ rb_define_method(re2_cScanner, "eof?",
1781
+ RUBY_METHOD_FUNC(re2_scanner_eof), 0);
1782
+ rb_define_method(re2_cScanner, "regexp",
1783
+ RUBY_METHOD_FUNC(re2_scanner_regexp), 0);
1784
+ rb_define_method(re2_cScanner, "scan",
1785
+ RUBY_METHOD_FUNC(re2_scanner_scan), 0);
1786
+ rb_define_method(re2_cScanner, "rewind",
1787
+ RUBY_METHOD_FUNC(re2_scanner_rewind), 0);
1788
+
1789
+ rb_define_method(re2_cRegexp, "initialize",
1790
+ RUBY_METHOD_FUNC(re2_regexp_initialize), -1);
1791
+ rb_define_method(re2_cRegexp, "ok?", RUBY_METHOD_FUNC(re2_regexp_ok), 0);
1792
+ rb_define_method(re2_cRegexp, "error", RUBY_METHOD_FUNC(re2_regexp_error),
1793
+ 0);
1794
+ rb_define_method(re2_cRegexp, "error_arg",
1795
+ RUBY_METHOD_FUNC(re2_regexp_error_arg), 0);
1796
+ rb_define_method(re2_cRegexp, "program_size",
1797
+ RUBY_METHOD_FUNC(re2_regexp_program_size), 0);
1798
+ rb_define_method(re2_cRegexp, "options",
1799
+ RUBY_METHOD_FUNC(re2_regexp_options), 0);
1800
+ rb_define_method(re2_cRegexp, "number_of_capturing_groups",
1801
+ RUBY_METHOD_FUNC(re2_regexp_number_of_capturing_groups), 0);
1802
+ rb_define_method(re2_cRegexp, "named_capturing_groups",
1803
+ RUBY_METHOD_FUNC(re2_regexp_named_capturing_groups), 0);
1804
+ rb_define_method(re2_cRegexp, "match", RUBY_METHOD_FUNC(re2_regexp_match),
1805
+ -1);
1806
+ rb_define_method(re2_cRegexp, "match?",
1807
+ RUBY_METHOD_FUNC(re2_regexp_match_p), 1);
1808
+ rb_define_method(re2_cRegexp, "=~",
1809
+ RUBY_METHOD_FUNC(re2_regexp_match_p), 1);
1810
+ rb_define_method(re2_cRegexp, "===",
1811
+ RUBY_METHOD_FUNC(re2_regexp_match_p), 1);
1812
+ rb_define_method(re2_cRegexp, "scan",
1813
+ RUBY_METHOD_FUNC(re2_regexp_scan), 1);
1814
+ rb_define_method(re2_cRegexp, "to_s", RUBY_METHOD_FUNC(re2_regexp_to_s), 0);
1815
+ rb_define_method(re2_cRegexp, "to_str", RUBY_METHOD_FUNC(re2_regexp_to_s),
1816
+ 0);
1817
+ rb_define_method(re2_cRegexp, "pattern", RUBY_METHOD_FUNC(re2_regexp_to_s),
1818
+ 0);
1819
+ rb_define_method(re2_cRegexp, "source", RUBY_METHOD_FUNC(re2_regexp_to_s),
1820
+ 0);
1821
+ rb_define_method(re2_cRegexp, "inspect",
1822
+ RUBY_METHOD_FUNC(re2_regexp_inspect), 0);
1823
+ rb_define_method(re2_cRegexp, "utf8?", RUBY_METHOD_FUNC(re2_regexp_utf8),
1824
+ 0);
1825
+ rb_define_method(re2_cRegexp, "posix_syntax?",
1826
+ RUBY_METHOD_FUNC(re2_regexp_posix_syntax), 0);
1827
+ rb_define_method(re2_cRegexp, "longest_match?",
1828
+ RUBY_METHOD_FUNC(re2_regexp_longest_match), 0);
1829
+ rb_define_method(re2_cRegexp, "log_errors?",
1830
+ RUBY_METHOD_FUNC(re2_regexp_log_errors), 0);
1831
+ rb_define_method(re2_cRegexp, "max_mem",
1832
+ RUBY_METHOD_FUNC(re2_regexp_max_mem), 0);
1833
+ rb_define_method(re2_cRegexp, "literal?",
1834
+ RUBY_METHOD_FUNC(re2_regexp_literal), 0);
1835
+ rb_define_method(re2_cRegexp, "never_nl?",
1836
+ RUBY_METHOD_FUNC(re2_regexp_never_nl), 0);
1837
+ rb_define_method(re2_cRegexp, "case_sensitive?",
1838
+ RUBY_METHOD_FUNC(re2_regexp_case_sensitive), 0);
1839
+ rb_define_method(re2_cRegexp, "case_insensitive?",
1840
+ RUBY_METHOD_FUNC(re2_regexp_case_insensitive), 0);
1841
+ rb_define_method(re2_cRegexp, "casefold?",
1842
+ RUBY_METHOD_FUNC(re2_regexp_case_insensitive), 0);
1843
+ rb_define_method(re2_cRegexp, "perl_classes?",
1844
+ RUBY_METHOD_FUNC(re2_regexp_perl_classes), 0);
1845
+ rb_define_method(re2_cRegexp, "word_boundary?",
1846
+ RUBY_METHOD_FUNC(re2_regexp_word_boundary), 0);
1847
+ rb_define_method(re2_cRegexp, "one_line?",
1848
+ RUBY_METHOD_FUNC(re2_regexp_one_line), 0);
1849
+
1850
+ rb_define_singleton_method(re2_cSet, "match_raises_errors?",
1851
+ RUBY_METHOD_FUNC(re2_set_match_raises_errors_p), 0);
1852
+ rb_define_method(re2_cSet, "initialize",
1853
+ RUBY_METHOD_FUNC(re2_set_initialize), -1);
1854
+ rb_define_method(re2_cSet, "add", RUBY_METHOD_FUNC(re2_set_add), 1);
1855
+ rb_define_method(re2_cSet, "compile", RUBY_METHOD_FUNC(re2_set_compile), 0);
1856
+ rb_define_method(re2_cSet, "match", RUBY_METHOD_FUNC(re2_set_match), -1);
1857
+
1858
+ rb_define_module_function(re2_mRE2, "Replace",
1859
+ RUBY_METHOD_FUNC(re2_Replace), 3);
1860
+ rb_define_module_function(re2_mRE2, "GlobalReplace",
1861
+ RUBY_METHOD_FUNC(re2_GlobalReplace), 3);
1862
+ rb_define_module_function(re2_mRE2, "QuoteMeta",
1863
+ RUBY_METHOD_FUNC(re2_QuoteMeta), 1);
1864
+ rb_define_singleton_method(re2_cRegexp, "escape",
1865
+ RUBY_METHOD_FUNC(re2_QuoteMeta), 1);
1866
+ rb_define_singleton_method(re2_cRegexp, "quote",
1867
+ RUBY_METHOD_FUNC(re2_QuoteMeta), 1);
1868
+ rb_define_singleton_method(re2_cRegexp, "compile",
1869
+ RUBY_METHOD_FUNC(rb_class_new_instance), -1);
1870
+
1871
+ rb_define_global_function("RE2", RUBY_METHOD_FUNC(re2_re2), -1);
1872
+
1873
+ /* Create the symbols used in options. */
1874
+ id_utf8 = rb_intern("utf8");
1875
+ id_posix_syntax = rb_intern("posix_syntax");
1876
+ id_longest_match = rb_intern("longest_match");
1877
+ id_log_errors = rb_intern("log_errors");
1878
+ id_max_mem = rb_intern("max_mem");
1879
+ id_literal = rb_intern("literal");
1880
+ id_never_nl = rb_intern("never_nl");
1881
+ id_case_sensitive = rb_intern("case_sensitive");
1882
+ id_perl_classes = rb_intern("perl_classes");
1883
+ id_word_boundary = rb_intern("word_boundary");
1884
+ id_one_line = rb_intern("one_line");
1885
+ id_unanchored = rb_intern("unanchored");
1886
+ id_anchor_start = rb_intern("anchor_start");
1887
+ id_anchor_both = rb_intern("anchor_both");
1888
+ id_exception = rb_intern("exception");
1889
+ }