re2 0.6.0 → 0.6.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (6) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +5 -2
  3. data/Rakefile +4 -0
  4. data/ext/re2/re2.cc +1168 -1161
  5. data/spec/re2/regexp_spec.rb +4 -0
  6. metadata +12 -11
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: dbde479e41f910dff23edbc656cbc6ab092c322c
4
- data.tar.gz: c69628eea5d97fae581078584353b3125b06134d
3
+ metadata.gz: 44eacddc1b64782c61eff633b15f40db1ba3b9d9
4
+ data.tar.gz: 377ae25429bb8804c03a951d0150097d11cb808e
5
5
  SHA512:
6
- metadata.gz: fbceef56880f497c8d09da21123a12056ba5bc4c351bc7e49f88b657b323fc7e9d775bb1e48836222941954ffd356f6709348eb6d9b3f7bdfa0899b77e1d0e57
7
- data.tar.gz: f5a1724a484d227cb9499611c285ceeb60fbf27e99b6cfe7a67c71eb3d7272745740eb79e25a5ba828cb25ec667cc51778429a26eca95b6569d94f814c0a48b3
6
+ metadata.gz: 1f09c136bb17bfabfe739882cc61c7f0bf50dfba140aec66db0a6e2962de0d25e844804812137b5c3e798a61bb2b8002d83fc4d7766bc7fc52ce159535634b4e
7
+ data.tar.gz: f9742540b13859de929a299ac28a646d305e4a72bcffdd0588dad268f53cb0135f440a1f914cd7504a3b01eb5bf9c569c54c83cfaa4ba0b7f131326ef01a0212
data/README.md CHANGED
@@ -1,9 +1,12 @@
1
- re2 [![Build Status](https://secure.travis-ci.org/mudge/re2.png?branch=master)](http://travis-ci.org/mudge/re2)
1
+ re2 [![Build Status](https://travis-ci.org/mudge/re2.svg?branch=master)](http://travis-ci.org/mudge/re2)
2
2
  ===
3
3
 
4
4
  A Ruby binding to [re2][], an "efficient, principled regular expression
5
5
  library".
6
6
 
7
+ **Current version:** 0.6.1
8
+ **Supported Ruby versions:** 1.8.7, 1.9.2, 1.9.3, 2.0.0, 2.1.0, Rubinius 2.2
9
+
7
10
  Installation
8
11
  ------------
9
12
 
@@ -158,7 +161,7 @@ All feedback should go to the mailing list: <mailto:ruby.re2@librelist.com>
158
161
  [ruby-dev]: http://packages.debian.org/ruby-dev
159
162
  [build-essential]: http://packages.debian.org/build-essential
160
163
  [Regexp]: http://ruby-doc.org/core/classes/Regexp.html
161
- [MatchData]: http://ruby-doc.org/core/classes/MatchData.html
164
+ [MatchData]: http://ruby-doc.org/core/classes/MatchData.html
162
165
  [Homebrew]: http://mxcl.github.com/homebrew
163
166
  [libre2-dev]: http://packages.debian.org/search?keywords=libre2-dev
164
167
  [official syntax page]: http://code.google.com/p/re2/wiki/Syntax
data/Rakefile CHANGED
@@ -9,6 +9,10 @@ Rake::TestTask.new do |t|
9
9
  t.verbose = true
10
10
  end
11
11
 
12
+ task :valgrind do
13
+ system "valgrind --tool=memcheck --leak-check=full --show-reachable=no --num-callers=15 --track-fds=yes --workaround-gcc296-bugs=yes --max-stackframe=7304328 --dsymutil=yes --track-origins=yes --log-file=report.txt ruby spec/leak.rb"
14
+ end
15
+
12
16
  task :test => :compile
13
17
  task :spec => :test
14
18
  task :default => :test
@@ -17,1304 +17,1311 @@ using std::nothrow;
17
17
  using std::map;
18
18
  using std::vector;
19
19
 
20
- extern "C" {
21
- #ifdef HAVE_RUBY_ENCODING_H
22
- #include <ruby/encoding.h>
23
- #define ENCODED_STR_NEW(str, length, encoding) \
24
- ({ \
25
- VALUE _string = rb_str_new(str, length); \
26
- int _enc = rb_enc_find_index(encoding); \
27
- rb_enc_associate_index(_string, _enc); \
28
- _string; \
29
- })
30
- #define ENCODED_STR_NEW2(str, length, str2) \
31
- ({ \
32
- VALUE _string = rb_str_new(str, length); \
33
- int _enc = rb_enc_get_index(str2); \
34
- rb_enc_associate_index(_string, _enc); \
35
- _string; \
36
- })
37
- #else
38
- #define ENCODED_STR_NEW(str, length, encoding) \
39
- rb_str_new((const char *)str, (long)length)
40
- #define ENCODED_STR_NEW2(str, length, str2) \
41
- rb_str_new((const char *)str, (long)length)
42
- #endif
20
+ #ifdef HAVE_RUBY_ENCODING_H
21
+ #include <ruby/encoding.h>
22
+ #define ENCODED_STR_NEW(str, length, encoding) \
23
+ ({ \
24
+ VALUE _string = rb_str_new(str, length); \
25
+ int _enc = rb_enc_find_index(encoding); \
26
+ rb_enc_associate_index(_string, _enc); \
27
+ _string; \
28
+ })
29
+ #define ENCODED_STR_NEW2(str, length, str2) \
30
+ ({ \
31
+ VALUE _string = rb_str_new(str, length); \
32
+ int _enc = rb_enc_get_index(str2); \
33
+ rb_enc_associate_index(_string, _enc); \
34
+ _string; \
35
+ })
36
+ #else
37
+ #define ENCODED_STR_NEW(str, length, encoding) \
38
+ rb_str_new((const char *)str, (long)length)
39
+ #define ENCODED_STR_NEW2(str, length, str2) \
40
+ rb_str_new((const char *)str, (long)length)
41
+ #endif
42
+
43
+ #define BOOL2RUBY(v) (v ? Qtrue : Qfalse)
44
+ #define UNUSED(x) ((void)x)
45
+
46
+ #ifndef RSTRING_LEN
47
+ #define RSTRING_LEN(x) (RSTRING(x)->len)
48
+ #endif
49
+
50
+ #ifndef RSTRING_PTR
51
+ #define RSTRING_PTR(x) (RSTRING(x)->ptr)
52
+ #endif
53
+
54
+ #ifdef HAVE_ENDPOS_ARGUMENT
55
+ #define match(pattern, text, startpos, endpos, anchor, match, nmatch) \
56
+ (pattern->Match(text, startpos, endpos, anchor, match, nmatch))
57
+ #else
58
+ #define match(pattern, text, startpos, endpos, anchor, match, nmatch) \
59
+ (pattern->Match(text, startpos, anchor, match, nmatch))
60
+ #endif
61
+
62
+ typedef struct {
63
+ RE2 *pattern;
64
+ } re2_pattern;
65
+
66
+ typedef struct {
67
+ re2::StringPiece *matches;
68
+ int number_of_matches;
69
+ VALUE regexp, text;
70
+ } re2_matchdata;
71
+
72
+ typedef struct {
73
+ re2::StringPiece *input;
74
+ int number_of_capturing_groups;
75
+ VALUE regexp, text;
76
+ } re2_scanner;
77
+
78
+ VALUE re2_mRE2, re2_cRegexp, re2_cMatchData, re2_cScanner;
79
+
80
+ /* Symbols used in RE2 options. */
81
+ static ID id_utf8, id_posix_syntax, id_longest_match, id_log_errors,
82
+ id_max_mem, id_literal, id_never_nl, id_case_sensitive,
83
+ id_perl_classes, id_word_boundary, id_one_line;
84
+
85
+ void re2_matchdata_mark(re2_matchdata* self) {
86
+ rb_gc_mark(self->regexp);
87
+ rb_gc_mark(self->text);
88
+ }
43
89
 
44
- #define BOOL2RUBY(v) (v ? Qtrue : Qfalse)
45
- #define UNUSED(x) ((void)x)
90
+ void re2_matchdata_free(re2_matchdata* self) {
91
+ if (self->matches) {
92
+ delete[] self->matches;
93
+ }
94
+ free(self);
95
+ }
46
96
 
47
- #ifndef RSTRING_LEN
48
- #define RSTRING_LEN(x) (RSTRING(x)->len)
49
- #endif
97
+ void re2_scanner_mark(re2_scanner* self) {
98
+ rb_gc_mark(self->regexp);
99
+ rb_gc_mark(self->text);
100
+ }
50
101
 
51
- #ifndef RSTRING_PTR
52
- #define RSTRING_PTR(x) (RSTRING(x)->ptr)
53
- #endif
102
+ void re2_scanner_free(re2_scanner* self) {
103
+ if (self->input) {
104
+ delete self->input;
105
+ }
106
+ free(self);
107
+ }
54
108
 
55
- #ifdef HAVE_ENDPOS_ARGUMENT
56
- #define match(pattern, text, startpos, endpos, anchor, match, nmatch) \
57
- (pattern->Match(text, startpos, endpos, anchor, match, nmatch))
58
- #else
59
- #define match(pattern, text, startpos, endpos, anchor, match, nmatch) \
60
- (pattern->Match(text, startpos, anchor, match, nmatch))
61
- #endif
109
+ void re2_regexp_free(re2_pattern* self) {
110
+ if (self->pattern) {
111
+ delete self->pattern;
112
+ }
113
+ free(self);
114
+ }
62
115
 
63
- typedef struct {
64
- RE2 *pattern;
65
- } re2_pattern;
116
+ static VALUE re2_matchdata_allocate(VALUE klass) {
117
+ re2_matchdata *m;
118
+ return Data_Make_Struct(klass, re2_matchdata, re2_matchdata_mark,
119
+ re2_matchdata_free, m);
120
+ }
66
121
 
67
- typedef struct {
68
- re2::StringPiece *matches;
69
- int number_of_matches;
70
- VALUE regexp, text;
71
- } re2_matchdata;
122
+ static VALUE re2_scanner_allocate(VALUE klass) {
123
+ re2_scanner *c;
124
+ return Data_Make_Struct(klass, re2_scanner, re2_scanner_mark,
125
+ re2_scanner_free, c);
126
+ }
127
+
128
+ /*
129
+ * Returns a frozen copy of the string passed into +match+.
130
+ *
131
+ * @return [String] a frozen copy of the passed string.
132
+ * @example
133
+ * m = RE2::Regexp.new('(\d+)').match("bob 123")
134
+ * m.string #=> "bob 123"
135
+ */
136
+ static VALUE re2_matchdata_string(VALUE self) {
137
+ re2_matchdata *m;
138
+ Data_Get_Struct(self, re2_matchdata, m);
72
139
 
73
- typedef struct {
74
- re2::StringPiece *input;
75
- int number_of_capturing_groups;
76
- VALUE regexp, text;
77
- } re2_scanner;
140
+ return m->text;
141
+ }
78
142
 
79
- VALUE re2_mRE2, re2_cRegexp, re2_cMatchData, re2_cScanner;
143
+ /*
144
+ * Returns the string passed into the scanner.
145
+ *
146
+ * @return [String] the original string.
147
+ * @example
148
+ * c = RE2::Regexp.new('(\d+)').scan("foo")
149
+ * c.string #=> "foo"
150
+ */
151
+ static VALUE re2_scanner_string(VALUE self) {
152
+ re2_scanner *c;
153
+ Data_Get_Struct(self, re2_scanner, c);
80
154
 
81
- /* Symbols used in RE2 options. */
82
- static ID id_utf8, id_posix_syntax, id_longest_match, id_log_errors,
83
- id_max_mem, id_literal, id_never_nl, id_case_sensitive,
84
- id_perl_classes, id_word_boundary, id_one_line;
155
+ return c->text;
156
+ }
85
157
 
86
- void re2_matchdata_mark(re2_matchdata* self) {
87
- rb_gc_mark(self->regexp);
88
- rb_gc_mark(self->text);
89
- }
158
+ /*
159
+ * Rewind the scanner to the start of the string.
160
+ *
161
+ * @example
162
+ * s = RE2::Regexp.new('(\d+)').scan("1 2 3")
163
+ * e = s.to_enum
164
+ * e.scan #=> ["1"]
165
+ * e.scan #=> ["2"]
166
+ * s.rewind
167
+ * e.scan #=> ["1"]
168
+ */
169
+ static VALUE re2_scanner_rewind(VALUE self) {
170
+ re2_scanner *c;
171
+ Data_Get_Struct(self, re2_scanner, c);
90
172
 
91
- void re2_matchdata_free(re2_matchdata* self) {
92
- if (self->matches) {
93
- delete[] self->matches;
94
- }
95
- free(self);
96
- }
173
+ c->input = new(nothrow) re2::StringPiece(StringValuePtr(c->text));
97
174
 
98
- void re2_scanner_mark(re2_scanner* self) {
99
- rb_gc_mark(self->regexp);
100
- rb_gc_mark(self->text);
101
- }
175
+ return self;
176
+ }
102
177
 
103
- void re2_scanner_free(re2_scanner* self) {
104
- if (self->input) {
105
- delete self->input;
106
- }
107
- free(self);
178
+ /*
179
+ * Scan the given text incrementally for matches, returning an array of
180
+ * matches on each subsequent call. Returns nil if no matches are found.
181
+ *
182
+ * @return [Array<String>] the matches.
183
+ * @example
184
+ * s = RE2::Regexp.new('(\w+)').scan("Foo bar baz")
185
+ * s.scan #=> ["Foo"]
186
+ * s.scan #=> ["bar"]
187
+ */
188
+ static VALUE re2_scanner_scan(VALUE self) {
189
+ int i;
190
+ re2_pattern *p;
191
+ re2_scanner *c;
192
+ VALUE result;
193
+
194
+ Data_Get_Struct(self, re2_scanner, c);
195
+ Data_Get_Struct(c->regexp, re2_pattern, p);
196
+
197
+ vector<RE2::Arg> argv(c->number_of_capturing_groups);
198
+ vector<RE2::Arg*> args(c->number_of_capturing_groups);
199
+ vector<string> matches(c->number_of_capturing_groups);
200
+
201
+ for (i = 0; i < c->number_of_capturing_groups; i++) {
202
+ matches[i] = "";
203
+ argv[i] = &matches[i];
204
+ args[i] = &argv[i];
108
205
  }
109
206
 
110
- void re2_regexp_free(re2_pattern* self) {
111
- if (self->pattern) {
112
- delete self->pattern;
207
+ if (RE2::FindAndConsumeN(c->input, *p->pattern, &args[0],
208
+ c->number_of_capturing_groups)) {
209
+ result = rb_ary_new2(c->number_of_capturing_groups);
210
+ for (i = 0; i < c->number_of_capturing_groups; i++) {
211
+ if (matches[i].empty()) {
212
+ rb_ary_push(result, Qnil);
213
+ } else {
214
+ rb_ary_push(result, ENCODED_STR_NEW(matches[i].data(),
215
+ matches[i].size(),
216
+ p->pattern->options().utf8() ? "UTF-8" : "ISO-8859-1"));
217
+ }
113
218
  }
114
- free(self);
219
+ } else {
220
+ result = Qnil;
115
221
  }
116
222
 
117
- static VALUE re2_matchdata_allocate(VALUE klass) {
118
- re2_matchdata *m;
119
- return Data_Make_Struct(klass, re2_matchdata, re2_matchdata_mark,
120
- re2_matchdata_free, m);
121
- }
223
+ return result;
224
+ }
122
225
 
123
- static VALUE re2_scanner_allocate(VALUE klass) {
124
- re2_scanner *c;
125
- return Data_Make_Struct(klass, re2_scanner, re2_scanner_mark,
126
- re2_scanner_free, c);
127
- }
226
+ /*
227
+ * Returns the number of elements in the match array (including nils).
228
+ *
229
+ * @return [Fixnum] the number of elements
230
+ * @example
231
+ * m = RE2::Regexp.new('(\d+)').match("bob 123")
232
+ * m.size #=> 2
233
+ * m.length #=> 2
234
+ */
235
+ static VALUE re2_matchdata_size(VALUE self) {
236
+ re2_matchdata *m;
237
+ Data_Get_Struct(self, re2_matchdata, m);
128
238
 
129
- /*
130
- * Returns a frozen copy of the string passed into +match+.
131
- *
132
- * @return [String] a frozen copy of the passed string.
133
- * @example
134
- * m = RE2::Regexp.new('(\d+)').match("bob 123")
135
- * m.string #=> "bob 123"
136
- */
137
- static VALUE re2_matchdata_string(VALUE self) {
138
- re2_matchdata *m;
139
- Data_Get_Struct(self, re2_matchdata, m);
140
-
141
- return m->text;
142
- }
239
+ return INT2FIX(m->number_of_matches);
240
+ }
143
241
 
144
- /*
145
- * Returns the string passed into the scanner.
146
- *
147
- * @return [String] the original string.
148
- * @example
149
- * c = RE2::Regexp.new('(\d+)').scan("foo")
150
- * c.string #=> "foo"
151
- */
152
- static VALUE re2_scanner_string(VALUE self) {
153
- re2_scanner *c;
154
- Data_Get_Struct(self, re2_scanner, c);
155
-
156
- return c->text;
157
- }
242
+ /*
243
+ * Returns the {RE2::Regexp} used in the match.
244
+ *
245
+ * @return [RE2::Regexp] the regexp used in the match
246
+ * @example
247
+ * m = RE2::Regexp.new('(\d+)').match("bob 123")
248
+ * m.regexp #=> #<RE2::Regexp /(\d+)/>
249
+ */
250
+ static VALUE re2_matchdata_regexp(VALUE self) {
251
+ re2_matchdata *m;
252
+ Data_Get_Struct(self, re2_matchdata, m);
253
+ return m->regexp;
254
+ }
158
255
 
159
- /*
160
- * Rewind the scanner to the start of the string.
161
- *
162
- * @example
163
- * s = RE2::Regexp.new('(\d+)').scan("1 2 3")
164
- * e = s.to_enum
165
- * e.scan #=> ["1"]
166
- * e.scan #=> ["2"]
167
- * s.rewind
168
- * e.scan #=> ["1"]
169
- */
170
- static VALUE re2_scanner_rewind(VALUE self) {
171
- re2_scanner *c;
172
- Data_Get_Struct(self, re2_scanner, c);
173
-
174
- c->input = new(nothrow) re2::StringPiece(StringValuePtr(c->text));
175
-
176
- return self;
177
- }
256
+ /*
257
+ * Returns the {RE2::Regexp} used in the scanner.
258
+ *
259
+ * @return [RE2::Regexp] the regexp used in the scanner
260
+ * @example
261
+ * c = RE2::Regexp.new('(\d+)').scan("bob 123")
262
+ * c.regexp #=> #<RE2::Regexp /(\d+)/>
263
+ */
264
+ static VALUE re2_scanner_regexp(VALUE self) {
265
+ re2_scanner *c;
266
+ Data_Get_Struct(self, re2_scanner, c);
178
267
 
179
- /*
180
- * Scan the given text incrementally for matches, returning an array of
181
- * matches on each subsequent call. Returns nil if no matches are found.
182
- *
183
- * @return [Array<String>] the matches.
184
- * @example
185
- * s = RE2::Regexp.new('(\w+)').scan("Foo bar baz")
186
- * s.scan #=> ["Foo"]
187
- * s.scan #=> ["bar"]
188
- */
189
- static VALUE re2_scanner_scan(VALUE self) {
190
- int i;
191
- re2_pattern *p;
192
- re2_scanner *c;
193
- VALUE result;
194
-
195
- Data_Get_Struct(self, re2_scanner, c);
196
- Data_Get_Struct(c->regexp, re2_pattern, p);
197
-
198
- vector<RE2::Arg> argv(c->number_of_capturing_groups);
199
- vector<RE2::Arg*> args(c->number_of_capturing_groups);
200
- vector<string> matches(c->number_of_capturing_groups);
268
+ return c->regexp;
269
+ }
201
270
 
202
- for (i = 0; i < c->number_of_capturing_groups; i++) {
203
- matches[i] = "";
204
- argv[i] = &matches[i];
205
- args[i] = &argv[i];
206
- }
271
+ static VALUE re2_regexp_allocate(VALUE klass) {
272
+ re2_pattern *p;
273
+ return Data_Make_Struct(klass, re2_pattern, 0, re2_regexp_free, p);
274
+ }
207
275
 
208
- if (RE2::FindAndConsumeN(c->input, *p->pattern, &args[0],
209
- c->number_of_capturing_groups)) {
210
- result = rb_ary_new2(c->number_of_capturing_groups);
211
- for (i = 0; i < c->number_of_capturing_groups; i++) {
212
- if (matches[i].empty()) {
213
- rb_ary_push(result, Qnil);
214
- } else {
215
- rb_ary_push(result, ENCODED_STR_NEW(matches[i].data(),
216
- matches[i].size(),
217
- p->pattern->options().utf8() ? "UTF-8" : "ISO-8859-1"));
218
- }
219
- }
276
+ /*
277
+ * Returns the array of matches.
278
+ *
279
+ * @return [Array<String, nil>] the array of matches
280
+ * @example
281
+ * m = RE2::Regexp.new('(\d+)').match("bob 123")
282
+ * m.to_a #=> ["123", "123"]
283
+ */
284
+ static VALUE re2_matchdata_to_a(VALUE self) {
285
+ int i;
286
+ re2_matchdata *m;
287
+ re2_pattern *p;
288
+ re2::StringPiece *match;
289
+ VALUE array;
290
+
291
+ Data_Get_Struct(self, re2_matchdata, m);
292
+ Data_Get_Struct(m->regexp, re2_pattern, p);
293
+
294
+ array = rb_ary_new2(m->number_of_matches);
295
+ for (i = 0; i < m->number_of_matches; i++) {
296
+ match = &m->matches[i];
297
+
298
+ if (match->empty()) {
299
+ rb_ary_push(array, Qnil);
220
300
  } else {
221
- result = Qnil;
301
+ rb_ary_push(array, ENCODED_STR_NEW(match->data(), match->size(),
302
+ p->pattern->options().utf8() ? "UTF-8" : "ISO-8859-1"));
222
303
  }
223
-
224
- return result;
225
304
  }
226
305
 
227
- /*
228
- * Returns the number of elements in the match array (including nils).
229
- *
230
- * @return [Fixnum] the number of elements
231
- * @example
232
- * m = RE2::Regexp.new('(\d+)').match("bob 123")
233
- * m.size #=> 2
234
- * m.length #=> 2
235
- */
236
- static VALUE re2_matchdata_size(VALUE self) {
237
- re2_matchdata *m;
238
- Data_Get_Struct(self, re2_matchdata, m);
239
-
240
- return INT2FIX(m->number_of_matches);
241
- }
306
+ return array;
307
+ }
242
308
 
243
- /*
244
- * Returns the {RE2::Regexp} used in the match.
245
- *
246
- * @return [RE2::Regexp] the regexp used in the match
247
- * @example
248
- * m = RE2::Regexp.new('(\d+)').match("bob 123")
249
- * m.regexp #=> #<RE2::Regexp /(\d+)/>
250
- */
251
- static VALUE re2_matchdata_regexp(VALUE self) {
252
- re2_matchdata *m;
253
- Data_Get_Struct(self, re2_matchdata, m);
254
- return m->regexp;
255
- }
309
+ static VALUE re2_matchdata_nth_match(int nth, VALUE self) {
310
+ re2_matchdata *m;
311
+ re2_pattern *p;
312
+ re2::StringPiece *match;
256
313
 
257
- /*
258
- * Returns the {RE2::Regexp} used in the scanner.
259
- *
260
- * @return [RE2::Regexp] the regexp used in the scanner
261
- * @example
262
- * c = RE2::Regexp.new('(\d+)').scan("bob 123")
263
- * c.regexp #=> #<RE2::Regexp /(\d+)/>
264
- */
265
- static VALUE re2_scanner_regexp(VALUE self) {
266
- re2_scanner *c;
267
- Data_Get_Struct(self, re2_scanner, c);
268
-
269
- return c->regexp;
270
- }
314
+ Data_Get_Struct(self, re2_matchdata, m);
315
+ Data_Get_Struct(m->regexp, re2_pattern, p);
271
316
 
272
- static VALUE re2_regexp_allocate(VALUE klass) {
273
- re2_pattern *p;
274
- return Data_Make_Struct(klass, re2_pattern, 0, re2_regexp_free, p);
275
- }
317
+ if (nth < 0 || nth >= m->number_of_matches) {
318
+ return Qnil;
319
+ } else {
320
+ match = &m->matches[nth];
276
321
 
277
- /*
278
- * Returns the array of matches.
279
- *
280
- * @return [Array<String, nil>] the array of matches
281
- * @example
282
- * m = RE2::Regexp.new('(\d+)').match("bob 123")
283
- * m.to_a #=> ["123", "123"]
284
- */
285
- static VALUE re2_matchdata_to_a(VALUE self) {
286
- int i;
287
- re2_matchdata *m;
288
- re2_pattern *p;
289
- re2::StringPiece match;
290
- VALUE array;
291
-
292
- Data_Get_Struct(self, re2_matchdata, m);
293
- Data_Get_Struct(m->regexp, re2_pattern, p);
294
-
295
- array = rb_ary_new2(m->number_of_matches);
296
- for (i = 0; i < m->number_of_matches; i++) {
297
- if (m->matches[i].empty()) {
298
- rb_ary_push(array, Qnil);
299
- } else {
300
- match = m->matches[i];
301
- rb_ary_push(array, ENCODED_STR_NEW(match.data(), match.size(),
302
- p->pattern->options().utf8() ? "UTF-8" : "ISO-8859-1"));
303
- }
322
+ if (match->empty()) {
323
+ return Qnil;
324
+ } else {
325
+ return ENCODED_STR_NEW(match->data(), match->size(),
326
+ p->pattern->options().utf8() ? "UTF-8" : "ISO-8859-1");
304
327
  }
305
-
306
- return array;
307
328
  }
329
+ }
308
330
 
309
- static VALUE re2_matchdata_nth_match(int nth, VALUE self) {
310
- re2_matchdata *m;
311
- re2_pattern *p;
312
- re2::StringPiece match;
331
+ static VALUE re2_matchdata_named_match(const char* name, VALUE self) {
332
+ int idx;
333
+ re2_matchdata *m;
334
+ re2_pattern *p;
335
+ map<string, int> groups;
336
+ string name_as_string(name);
313
337
 
314
- Data_Get_Struct(self, re2_matchdata, m);
315
- Data_Get_Struct(m->regexp, re2_pattern, p);
338
+ Data_Get_Struct(self, re2_matchdata, m);
339
+ Data_Get_Struct(m->regexp, re2_pattern, p);
316
340
 
317
- if (nth < 0 || nth >= m->number_of_matches) {
318
- return Qnil;
319
- } else {
320
- match = m->matches[nth];
341
+ groups = p->pattern->NamedCapturingGroups();
321
342
 
322
- if (match.empty()) {
323
- return Qnil;
324
- } else {
325
- return ENCODED_STR_NEW(match.data(), match.size(),
326
- p->pattern->options().utf8() ? "UTF-8" : "ISO-8859-1");
327
- }
328
- }
343
+ if (groups.count(name_as_string) == 1) {
344
+ idx = groups[name_as_string];
345
+ return re2_matchdata_nth_match(idx, self);
346
+ } else {
347
+ return Qnil;
329
348
  }
349
+ }
330
350
 
331
- static VALUE re2_matchdata_named_match(const char* name, VALUE self) {
332
- int idx;
333
- re2_matchdata *m;
334
- re2_pattern *p;
335
- map<string, int> groups;
336
- string name_as_string(name);
351
+ /*
352
+ * Retrieve zero, one or more matches by index or name.
353
+ *
354
+ * @return [Array<String, nil>, String, Boolean]
355
+ *
356
+ * @overload [](index)
357
+ * Access a particular match by index.
358
+ *
359
+ * @param [Fixnum] index the index of the match to fetch
360
+ * @return [String, nil] the specified match
361
+ * @example
362
+ * m = RE2::Regexp.new('(\d+)').match("bob 123")
363
+ * m[0] #=> "123"
364
+ *
365
+ * @overload [](start, length)
366
+ * Access a range of matches by starting index and length.
367
+ *
368
+ * @param [Fixnum] start the index from which to start
369
+ * @param [Fixnum] length the number of elements to fetch
370
+ * @return [Array<String, nil>] the specified matches
371
+ * @example
372
+ * m = RE2::Regexp.new('(\d+)').match("bob 123")
373
+ * m[0, 1] #=> ["123"]
374
+ *
375
+ * @overload [](range)
376
+ * Access a range of matches by index.
377
+ *
378
+ * @param [Range] range the range of match indexes to fetch
379
+ * @return [Array<String, nil>] the specified matches
380
+ * @example
381
+ * m = RE2::Regexp.new('(\d+)').match("bob 123")
382
+ * m[0..1] #=> "[123", "123"]
383
+ *
384
+ * @overload [](name)
385
+ * Access a particular match by name.
386
+ *
387
+ * @param [String, Symbol] name the name of the match to fetch
388
+ * @return [String, nil] the specific match
389
+ * @example
390
+ * m = RE2::Regexp.new('(?P<number>\d+)').match("bob 123")
391
+ * m["number"] #=> "123"
392
+ * m[:number] #=> "123"
393
+ */
394
+ static VALUE re2_matchdata_aref(int argc, VALUE *argv, VALUE self) {
395
+ VALUE idx, rest;
396
+ rb_scan_args(argc, argv, "11", &idx, &rest);
397
+
398
+ if (TYPE(idx) == T_STRING) {
399
+ return re2_matchdata_named_match(StringValuePtr(idx), self);
400
+ } else if (SYMBOL_P(idx)) {
401
+ return re2_matchdata_named_match(rb_id2name(SYM2ID(idx)), self);
402
+ } else if (!NIL_P(rest) || !FIXNUM_P(idx) || FIX2INT(idx) < 0) {
403
+ return rb_ary_aref(argc, argv, re2_matchdata_to_a(self));
404
+ } else {
405
+ return re2_matchdata_nth_match(FIX2INT(idx), self);
406
+ }
407
+ }
337
408
 
338
- Data_Get_Struct(self, re2_matchdata, m);
339
- Data_Get_Struct(m->regexp, re2_pattern, p);
409
+ /*
410
+ * Returns the entire matched string.
411
+ *
412
+ * @return [String] the entire matched string
413
+ */
414
+ static VALUE re2_matchdata_to_s(VALUE self) {
415
+ return re2_matchdata_nth_match(0, self);
416
+ }
340
417
 
341
- groups = p->pattern->NamedCapturingGroups();
418
+ /*
419
+ * Returns a printable version of the match.
420
+ *
421
+ * @return [String] a printable version of the match
422
+ * @example
423
+ * m = RE2::Regexp.new('(\d+)').match("bob 123")
424
+ * m.inspect #=> "#<RE2::MatchData \"123\" 1:\"123\">"
425
+ */
426
+ static VALUE re2_matchdata_inspect(VALUE self) {
427
+ int i;
428
+ re2_matchdata *m;
429
+ re2_pattern *p;
430
+ VALUE match, result;
431
+ ostringstream output;
342
432
 
343
- if (groups.count(name_as_string) == 1) {
344
- idx = groups[name_as_string];
345
- return re2_matchdata_nth_match(idx, self);
346
- } else {
347
- return Qnil;
433
+ Data_Get_Struct(self, re2_matchdata, m);
434
+ Data_Get_Struct(m->regexp, re2_pattern, p);
435
+
436
+ output << "#<RE2::MatchData";
437
+
438
+ for (i = 0; i < m->number_of_matches; i++) {
439
+ output << " ";
440
+
441
+ if (i > 0) {
442
+ output << i << ":";
348
443
  }
349
- }
350
444
 
351
- /*
352
- * Retrieve zero, one or more matches by index or name.
353
- *
354
- * @return [Array<String, nil>, String, Boolean]
355
- *
356
- * @overload [](index)
357
- * Access a particular match by index.
358
- *
359
- * @param [Fixnum] index the index of the match to fetch
360
- * @return [String, nil] the specified match
361
- * @example
362
- * m = RE2::Regexp.new('(\d+)').match("bob 123")
363
- * m[0] #=> "123"
364
- *
365
- * @overload [](start, length)
366
- * Access a range of matches by starting index and length.
367
- *
368
- * @param [Fixnum] start the index from which to start
369
- * @param [Fixnum] length the number of elements to fetch
370
- * @return [Array<String, nil>] the specified matches
371
- * @example
372
- * m = RE2::Regexp.new('(\d+)').match("bob 123")
373
- * m[0, 1] #=> ["123"]
374
- *
375
- * @overload [](range)
376
- * Access a range of matches by index.
377
- *
378
- * @param [Range] range the range of match indexes to fetch
379
- * @return [Array<String, nil>] the specified matches
380
- * @example
381
- * m = RE2::Regexp.new('(\d+)').match("bob 123")
382
- * m[0..1] #=> "[123", "123"]
383
- *
384
- * @overload [](name)
385
- * Access a particular match by name.
386
- *
387
- * @param [String, Symbol] name the name of the match to fetch
388
- * @return [String, nil] the specific match
389
- * @example
390
- * m = RE2::Regexp.new('(?P<number>\d+)').match("bob 123")
391
- * m["number"] #=> "123"
392
- * m[:number] #=> "123"
393
- */
394
- static VALUE re2_matchdata_aref(int argc, VALUE *argv, VALUE self) {
395
- VALUE idx, rest;
396
- rb_scan_args(argc, argv, "11", &idx, &rest);
397
-
398
- if (TYPE(idx) == T_STRING) {
399
- return re2_matchdata_named_match(StringValuePtr(idx), self);
400
- } else if (TYPE(idx) == T_SYMBOL) {
401
- return re2_matchdata_named_match(rb_id2name(SYM2ID(idx)), self);
402
- } else if (!NIL_P(rest) || !FIXNUM_P(idx) || FIX2INT(idx) < 0) {
403
- return rb_ary_aref(argc, argv, re2_matchdata_to_a(self));
445
+ match = re2_matchdata_nth_match(i, self);
446
+
447
+ if (match == Qnil) {
448
+ output << "nil";
404
449
  } else {
405
- return re2_matchdata_nth_match(FIX2INT(idx), self);
450
+ output << "\"" << StringValuePtr(match) << "\"";
406
451
  }
407
452
  }
408
453
 
409
- /*
410
- * Returns the entire matched string.
411
- *
412
- * @return [String] the entire matched string
413
- */
414
- static VALUE re2_matchdata_to_s(VALUE self) {
415
- return re2_matchdata_nth_match(0, self);
416
- }
454
+ output << ">";
417
455
 
418
- /*
419
- * Returns a printable version of the match.
420
- *
421
- * @return [String] a printable version of the match
422
- * @example
423
- * m = RE2::Regexp.new('(\d+)').match("bob 123")
424
- * m.inspect #=> "#<RE2::MatchData \"123\" 1:\"123\">"
425
- */
426
- static VALUE re2_matchdata_inspect(VALUE self) {
427
- int i;
428
- re2_matchdata *m;
429
- re2_pattern *p;
430
- VALUE match, result;
431
- ostringstream output;
432
-
433
- Data_Get_Struct(self, re2_matchdata, m);
434
- Data_Get_Struct(m->regexp, re2_pattern, p);
435
-
436
- output << "#<RE2::MatchData";
437
-
438
- for (i = 0; i < m->number_of_matches; i++) {
439
- output << " ";
440
-
441
- if (i > 0) {
442
- output << i << ":";
443
- }
456
+ result = ENCODED_STR_NEW(output.str().data(), output.str().length(),
457
+ p->pattern->options().utf8() ? "UTF-8" : "ISO-8859-1");
444
458
 
445
- match = re2_matchdata_nth_match(i, self);
459
+ return result;
460
+ }
446
461
 
447
- if (match == Qnil) {
448
- output << "nil";
449
- } else {
450
- output << "\"" << StringValuePtr(match) << "\"";
451
- }
462
+ /*
463
+ * Returns a new RE2 object with a compiled version of
464
+ * +pattern+ stored inside. Equivalent to +RE2.new+.
465
+ *
466
+ * @return [RE2::Regexp] an RE2::Regexp with the specified pattern and options
467
+ * @param [String] pattern the pattern to compile
468
+ * @param [Hash] options the options to compile a regexp with
469
+ * @see RE2::Regexp.new
470
+ *
471
+ */
472
+ static VALUE re2_re2(int argc, VALUE *argv, VALUE self) {
473
+ UNUSED(self);
474
+ return rb_class_new_instance(argc, argv, re2_cRegexp);
475
+ }
476
+
477
+ /*
478
+ * Returns a new {RE2::Regexp} object with a compiled version of
479
+ * +pattern+ stored inside.
480
+ *
481
+ * @return [RE2::Regexp]
482
+ *
483
+ * @overload initialize(pattern)
484
+ * Returns a new {RE2::Regexp} object with a compiled version of
485
+ * +pattern+ stored inside with the default options.
486
+ *
487
+ * @param [String] pattern the pattern to compile
488
+ * @return [RE2::Regexp] an RE2::Regexp with the specified pattern
489
+ * @raise [NoMemoryError] if memory could not be allocated for the compiled
490
+ * pattern
491
+ *
492
+ * @overload initialize(pattern, options)
493
+ * Returns a new {RE2::Regexp} object with a compiled version of
494
+ * +pattern+ stored inside with the specified options.
495
+ *
496
+ * @param [String] pattern the pattern to compile
497
+ * @param [Hash] options the options with which to compile the pattern
498
+ * @option options [Boolean] :utf8 (true) text and pattern are UTF-8; otherwise Latin-1
499
+ * @option options [Boolean] :posix_syntax (false) restrict regexps to POSIX egrep syntax
500
+ * @option options [Boolean] :longest_match (false) search for longest match, not first match
501
+ * @option options [Boolean] :log_errors (true) log syntax and execution errors to ERROR
502
+ * @option options [Fixnum] :max_mem approx. max memory footprint of RE2
503
+ * @option options [Boolean] :literal (false) interpret string as literal, not regexp
504
+ * @option options [Boolean] :never_nl (false) never match \n, even if it is in regexp
505
+ * @option options [Boolean] :case_sensitive (true) match is case-sensitive (regexp can override with (?i) unless in posix_syntax mode)
506
+ * @option options [Boolean] :perl_classes (false) allow Perl's \d \s \w \D \S \W when in posix_syntax mode
507
+ * @option options [Boolean] :word_boundary (false) allow \b \B (word boundary and not) when in posix_syntax mode
508
+ * @option options [Boolean] :one_line (false) ^ and $ only match beginning and end of text when in posix_syntax mode
509
+ * @return [RE2::Regexp] an RE2::Regexp with the specified pattern and options
510
+ * @raise [NoMemoryError] if memory could not be allocated for the compiled pattern
511
+ */
512
+ static VALUE re2_regexp_initialize(int argc, VALUE *argv, VALUE self) {
513
+ VALUE pattern, options, utf8, posix_syntax, longest_match, log_errors,
514
+ max_mem, literal, never_nl, case_sensitive, perl_classes,
515
+ word_boundary, one_line;
516
+ re2_pattern *p;
517
+
518
+ rb_scan_args(argc, argv, "11", &pattern, &options);
519
+ Data_Get_Struct(self, re2_pattern, p);
520
+
521
+ if (RTEST(options)) {
522
+ if (TYPE(options) != T_HASH) {
523
+ rb_raise(rb_eArgError, "options should be a hash");
452
524
  }
453
525
 
454
- output << ">";
526
+ RE2::Options re2_options;
455
527
 
456
- result = ENCODED_STR_NEW(output.str().data(), output.str().length(),
457
- p->pattern->options().utf8() ? "UTF-8" : "ISO-8859-1");
528
+ utf8 = rb_hash_aref(options, ID2SYM(id_utf8));
529
+ if (!NIL_P(utf8)) {
530
+ re2_options.set_utf8(RTEST(utf8));
531
+ }
458
532
 
459
- return result;
460
- }
533
+ posix_syntax = rb_hash_aref(options, ID2SYM(id_posix_syntax));
534
+ if (!NIL_P(posix_syntax)) {
535
+ re2_options.set_posix_syntax(RTEST(posix_syntax));
536
+ }
461
537
 
462
- /*
463
- * Returns a new RE2 object with a compiled version of
464
- * +pattern+ stored inside. Equivalent to +RE2.new+.
465
- *
466
- * @return [RE2::Regexp] an RE2::Regexp with the specified pattern and options
467
- * @param [String] pattern the pattern to compile
468
- * @param [Hash] options the options to compile a regexp with
469
- * @see RE2::Regexp.new
470
- *
471
- */
472
- static VALUE re2_re2(int argc, VALUE *argv, VALUE self) {
473
- UNUSED(self);
474
- return rb_class_new_instance(argc, argv, re2_cRegexp);
475
- }
538
+ longest_match = rb_hash_aref(options, ID2SYM(id_longest_match));
539
+ if (!NIL_P(longest_match)) {
540
+ re2_options.set_longest_match(RTEST(longest_match));
541
+ }
476
542
 
477
- /*
478
- * Returns a new {RE2::Regexp} object with a compiled version of
479
- * +pattern+ stored inside.
480
- *
481
- * @return [RE2::Regexp]
482
- *
483
- * @overload initialize(pattern)
484
- * Returns a new {RE2::Regexp} object with a compiled version of
485
- * +pattern+ stored inside with the default options.
486
- *
487
- * @param [String] pattern the pattern to compile
488
- * @return [RE2::Regexp] an RE2::Regexp with the specified pattern
489
- * @raise [NoMemoryError] if memory could not be allocated for the compiled
490
- * pattern
491
- *
492
- * @overload initialize(pattern, options)
493
- * Returns a new {RE2::Regexp} object with a compiled version of
494
- * +pattern+ stored inside with the specified options.
495
- *
496
- * @param [String] pattern the pattern to compile
497
- * @param [Hash] options the options with which to compile the pattern
498
- * @option options [Boolean] :utf8 (true) text and pattern are UTF-8; otherwise Latin-1
499
- * @option options [Boolean] :posix_syntax (false) restrict regexps to POSIX egrep syntax
500
- * @option options [Boolean] :longest_match (false) search for longest match, not first match
501
- * @option options [Boolean] :log_errors (true) log syntax and execution errors to ERROR
502
- * @option options [Fixnum] :max_mem approx. max memory footprint of RE2
503
- * @option options [Boolean] :literal (false) interpret string as literal, not regexp
504
- * @option options [Boolean] :never_nl (false) never match \n, even if it is in regexp
505
- * @option options [Boolean] :case_sensitive (true) match is case-sensitive (regexp can override with (?i) unless in posix_syntax mode)
506
- * @option options [Boolean] :perl_classes (false) allow Perl's \d \s \w \D \S \W when in posix_syntax mode
507
- * @option options [Boolean] :word_boundary (false) allow \b \B (word boundary and not) when in posix_syntax mode
508
- * @option options [Boolean] :one_line (false) ^ and $ only match beginning and end of text when in posix_syntax mode
509
- * @return [RE2::Regexp] an RE2::Regexp with the specified pattern and options
510
- * @raise [NoMemoryError] if memory could not be allocated for the compiled pattern
511
- */
512
- static VALUE re2_regexp_initialize(int argc, VALUE *argv, VALUE self) {
513
- VALUE pattern, options, utf8, posix_syntax, longest_match, log_errors,
514
- max_mem, literal, never_nl, case_sensitive, perl_classes,
515
- word_boundary, one_line;
516
- re2_pattern *p;
517
-
518
- rb_scan_args(argc, argv, "11", &pattern, &options);
519
- Data_Get_Struct(self, re2_pattern, p);
520
-
521
- if (RTEST(options)) {
522
- if (TYPE(options) != T_HASH) {
523
- rb_raise(rb_eArgError, "options should be a hash");
524
- }
543
+ log_errors = rb_hash_aref(options, ID2SYM(id_log_errors));
544
+ if (!NIL_P(log_errors)) {
545
+ re2_options.set_log_errors(RTEST(log_errors));
546
+ }
525
547
 
526
- RE2::Options re2_options;
548
+ max_mem = rb_hash_aref(options, ID2SYM(id_max_mem));
549
+ if (!NIL_P(max_mem)) {
550
+ re2_options.set_max_mem(NUM2INT(max_mem));
551
+ }
527
552
 
528
- utf8 = rb_hash_aref(options, ID2SYM(id_utf8));
529
- if (!NIL_P(utf8)) {
530
- re2_options.set_utf8(RTEST(utf8));
531
- }
553
+ literal = rb_hash_aref(options, ID2SYM(id_literal));
554
+ if (!NIL_P(literal)) {
555
+ re2_options.set_literal(RTEST(literal));
556
+ }
532
557
 
533
- posix_syntax = rb_hash_aref(options, ID2SYM(id_posix_syntax));
534
- if (!NIL_P(posix_syntax)) {
535
- re2_options.set_posix_syntax(RTEST(posix_syntax));
536
- }
558
+ never_nl = rb_hash_aref(options, ID2SYM(id_never_nl));
559
+ if (!NIL_P(never_nl)) {
560
+ re2_options.set_never_nl(RTEST(never_nl));
561
+ }
537
562
 
538
- longest_match = rb_hash_aref(options, ID2SYM(id_longest_match));
539
- if (!NIL_P(longest_match)) {
540
- re2_options.set_longest_match(RTEST(longest_match));
541
- }
563
+ case_sensitive = rb_hash_aref(options, ID2SYM(id_case_sensitive));
564
+ if (!NIL_P(case_sensitive)) {
565
+ re2_options.set_case_sensitive(RTEST(case_sensitive));
566
+ }
542
567
 
543
- log_errors = rb_hash_aref(options, ID2SYM(id_log_errors));
544
- if (!NIL_P(log_errors)) {
545
- re2_options.set_log_errors(RTEST(log_errors));
546
- }
568
+ perl_classes = rb_hash_aref(options, ID2SYM(id_perl_classes));
569
+ if (!NIL_P(perl_classes)) {
570
+ re2_options.set_perl_classes(RTEST(perl_classes));
571
+ }
547
572
 
548
- max_mem = rb_hash_aref(options, ID2SYM(id_max_mem));
549
- if (!NIL_P(max_mem)) {
550
- re2_options.set_max_mem(NUM2INT(max_mem));
551
- }
573
+ word_boundary = rb_hash_aref(options, ID2SYM(id_word_boundary));
574
+ if (!NIL_P(word_boundary)) {
575
+ re2_options.set_word_boundary(RTEST(word_boundary));
576
+ }
552
577
 
553
- literal = rb_hash_aref(options, ID2SYM(id_literal));
554
- if (!NIL_P(literal)) {
555
- re2_options.set_literal(RTEST(literal));
556
- }
578
+ one_line = rb_hash_aref(options, ID2SYM(id_one_line));
579
+ if (!NIL_P(one_line)) {
580
+ re2_options.set_one_line(RTEST(one_line));
581
+ }
557
582
 
558
- never_nl = rb_hash_aref(options, ID2SYM(id_never_nl));
559
- if (!NIL_P(never_nl)) {
560
- re2_options.set_never_nl(RTEST(never_nl));
561
- }
583
+ p->pattern = new(nothrow) RE2(StringValuePtr(pattern), re2_options);
584
+ } else {
585
+ p->pattern = new(nothrow) RE2(StringValuePtr(pattern));
586
+ }
562
587
 
563
- case_sensitive = rb_hash_aref(options, ID2SYM(id_case_sensitive));
564
- if (!NIL_P(case_sensitive)) {
565
- re2_options.set_case_sensitive(RTEST(case_sensitive));
566
- }
588
+ if (p->pattern == 0) {
589
+ rb_raise(rb_eNoMemError, "not enough memory to allocate RE2 object");
590
+ }
567
591
 
568
- perl_classes = rb_hash_aref(options, ID2SYM(id_perl_classes));
569
- if (!NIL_P(perl_classes)) {
570
- re2_options.set_perl_classes(RTEST(perl_classes));
571
- }
592
+ return self;
593
+ }
572
594
 
573
- word_boundary = rb_hash_aref(options, ID2SYM(id_word_boundary));
574
- if (!NIL_P(word_boundary)) {
575
- re2_options.set_word_boundary(RTEST(word_boundary));
576
- }
595
+ /*
596
+ * Returns a printable version of the regular expression +re2+.
597
+ *
598
+ * @return [String] a printable version of the regular expression
599
+ * @example
600
+ * re2 = RE2::Regexp.new("woo?")
601
+ * re2.inspect #=> "#<RE2::Regexp /woo?/>"
602
+ */
603
+ static VALUE re2_regexp_inspect(VALUE self) {
604
+ re2_pattern *p;
605
+ VALUE result;
606
+ ostringstream output;
577
607
 
578
- one_line = rb_hash_aref(options, ID2SYM(id_one_line));
579
- if (!NIL_P(one_line)) {
580
- re2_options.set_one_line(RTEST(one_line));
581
- }
608
+ Data_Get_Struct(self, re2_pattern, p);
582
609
 
583
- p->pattern = new(nothrow) RE2(StringValuePtr(pattern), re2_options);
584
- } else {
585
- p->pattern = new(nothrow) RE2(StringValuePtr(pattern));
586
- }
610
+ output << "#<RE2::Regexp /" << p->pattern->pattern() << "/>";
587
611
 
588
- if (p->pattern == 0) {
589
- rb_raise(rb_eNoMemError, "not enough memory to allocate RE2 object");
590
- }
612
+ result = ENCODED_STR_NEW(output.str().data(), output.str().length(),
613
+ p->pattern->options().utf8() ? "UTF-8" : "ISO-8859-1");
591
614
 
592
- return self;
593
- }
615
+ return result;
616
+ }
617
+
618
+ /*
619
+ * Returns a string version of the regular expression +re2+.
620
+ *
621
+ * @return [String] a string version of the regular expression
622
+ * @example
623
+ * re2 = RE2::Regexp.new("woo?")
624
+ * re2.to_s #=> "woo?"
625
+ */
626
+ static VALUE re2_regexp_to_s(VALUE self) {
627
+ re2_pattern *p;
628
+ Data_Get_Struct(self, re2_pattern, p);
629
+ return ENCODED_STR_NEW(p->pattern->pattern().data(),
630
+ p->pattern->pattern().size(),
631
+ p->pattern->options().utf8() ? "UTF-8" : "ISO-8859-1");
632
+ }
594
633
 
595
- /*
596
- * Returns a printable version of the regular expression +re2+.
597
- *
598
- * @return [String] a printable version of the regular expression
599
- * @example
600
- * re2 = RE2::Regexp.new("woo?")
601
- * re2.inspect #=> "#<RE2::Regexp /woo?/>"
602
- */
603
- static VALUE re2_regexp_inspect(VALUE self) {
604
- re2_pattern *p;
605
- VALUE result;
606
- ostringstream output;
634
+ /*
635
+ * Returns whether or not the regular expression +re2+
636
+ * was compiled successfully or not.
637
+ *
638
+ * @return [Boolean] whether or not compilation was successful
639
+ * @example
640
+ * re2 = RE2::Regexp.new("woo?")
641
+ * re2.ok? #=> true
642
+ */
643
+ static VALUE re2_regexp_ok(VALUE self) {
644
+ re2_pattern *p;
645
+ Data_Get_Struct(self, re2_pattern, p);
646
+ return BOOL2RUBY(p->pattern->ok());
647
+ }
607
648
 
608
- Data_Get_Struct(self, re2_pattern, p);
649
+ /*
650
+ * Returns whether or not the regular expression +re2+
651
+ * was compiled with the utf8 option set to true.
652
+ *
653
+ * @return [Boolean] the utf8 option
654
+ * @example
655
+ * re2 = RE2::Regexp.new("woo?", :utf8 => true)
656
+ * re2.utf8? #=> true
657
+ */
658
+ static VALUE re2_regexp_utf8(VALUE self) {
659
+ re2_pattern *p;
660
+ Data_Get_Struct(self, re2_pattern, p);
661
+ return BOOL2RUBY(p->pattern->options().utf8());
662
+ }
609
663
 
610
- output << "#<RE2::Regexp /" << p->pattern->pattern() << "/>";
664
+ /*
665
+ * Returns whether or not the regular expression +re2+
666
+ * was compiled with the posix_syntax option set to true.
667
+ *
668
+ * @return [Boolean] the posix_syntax option
669
+ * @example
670
+ * re2 = RE2::Regexp.new("woo?", :posix_syntax => true)
671
+ * re2.posix_syntax? #=> true
672
+ */
673
+ static VALUE re2_regexp_posix_syntax(VALUE self) {
674
+ re2_pattern *p;
675
+ Data_Get_Struct(self, re2_pattern, p);
676
+ return BOOL2RUBY(p->pattern->options().posix_syntax());
677
+ }
611
678
 
612
- result = ENCODED_STR_NEW(output.str().data(), output.str().length(),
613
- p->pattern->options().utf8() ? "UTF-8" : "ISO-8859-1");
679
+ /*
680
+ * Returns whether or not the regular expression +re2+
681
+ * was compiled with the longest_match option set to true.
682
+ *
683
+ * @return [Boolean] the longest_match option
684
+ * @example
685
+ * re2 = RE2::Regexp.new("woo?", :longest_match => true)
686
+ * re2.longest_match? #=> true
687
+ */
688
+ static VALUE re2_regexp_longest_match(VALUE self) {
689
+ re2_pattern *p;
690
+ Data_Get_Struct(self, re2_pattern, p);
691
+ return BOOL2RUBY(p->pattern->options().longest_match());
692
+ }
614
693
 
615
- return result;
616
- }
694
+ /*
695
+ * Returns whether or not the regular expression +re2+
696
+ * was compiled with the log_errors option set to true.
697
+ *
698
+ * @return [Boolean] the log_errors option
699
+ * @example
700
+ * re2 = RE2::Regexp.new("woo?", :log_errors => true)
701
+ * re2.log_errors? #=> true
702
+ */
703
+ static VALUE re2_regexp_log_errors(VALUE self) {
704
+ re2_pattern *p;
705
+ Data_Get_Struct(self, re2_pattern, p);
706
+ return BOOL2RUBY(p->pattern->options().log_errors());
707
+ }
617
708
 
618
- /*
619
- * Returns a string version of the regular expression +re2+.
620
- *
621
- * @return [String] a string version of the regular expression
622
- * @example
623
- * re2 = RE2::Regexp.new("woo?")
624
- * re2.to_s #=> "woo?"
625
- */
626
- static VALUE re2_regexp_to_s(VALUE self) {
627
- re2_pattern *p;
628
- Data_Get_Struct(self, re2_pattern, p);
629
- return ENCODED_STR_NEW(p->pattern->pattern().data(),
630
- p->pattern->pattern().size(),
631
- p->pattern->options().utf8() ? "UTF-8" : "ISO-8859-1");
632
- }
709
+ /*
710
+ * Returns the max_mem setting for the regular expression
711
+ * +re2+.
712
+ *
713
+ * @return [Fixnum] the max_mem option
714
+ * @example
715
+ * re2 = RE2::Regexp.new("woo?", :max_mem => 1024)
716
+ * re2.max_mem #=> 1024
717
+ */
718
+ static VALUE re2_regexp_max_mem(VALUE self) {
719
+ re2_pattern *p;
720
+ Data_Get_Struct(self, re2_pattern, p);
721
+ return INT2FIX(p->pattern->options().max_mem());
722
+ }
633
723
 
634
- /*
635
- * Returns whether or not the regular expression +re2+
636
- * was compiled successfully or not.
637
- *
638
- * @return [Boolean] whether or not compilation was successful
639
- * @example
640
- * re2 = RE2::Regexp.new("woo?")
641
- * re2.ok? #=> true
642
- */
643
- static VALUE re2_regexp_ok(VALUE self) {
644
- re2_pattern *p;
645
- Data_Get_Struct(self, re2_pattern, p);
646
- return BOOL2RUBY(p->pattern->ok());
647
- }
724
+ /*
725
+ * Returns whether or not the regular expression +re2+
726
+ * was compiled with the literal option set to true.
727
+ *
728
+ * @return [Boolean] the literal option
729
+ * @example
730
+ * re2 = RE2::Regexp.new("woo?", :literal => true)
731
+ * re2.literal? #=> true
732
+ */
733
+ static VALUE re2_regexp_literal(VALUE self) {
734
+ re2_pattern *p;
735
+ Data_Get_Struct(self, re2_pattern, p);
736
+ return BOOL2RUBY(p->pattern->options().literal());
737
+ }
648
738
 
649
- /*
650
- * Returns whether or not the regular expression +re2+
651
- * was compiled with the utf8 option set to true.
652
- *
653
- * @return [Boolean] the utf8 option
654
- * @example
655
- * re2 = RE2::Regexp.new("woo?", :utf8 => true)
656
- * re2.utf8? #=> true
657
- */
658
- static VALUE re2_regexp_utf8(VALUE self) {
659
- re2_pattern *p;
660
- Data_Get_Struct(self, re2_pattern, p);
661
- return BOOL2RUBY(p->pattern->options().utf8());
662
- }
739
+ /*
740
+ * Returns whether or not the regular expression +re2+
741
+ * was compiled with the never_nl option set to true.
742
+ *
743
+ * @return [Boolean] the never_nl option
744
+ * @example
745
+ * re2 = RE2::Regexp.new("woo?", :never_nl => true)
746
+ * re2.never_nl? #=> true
747
+ */
748
+ static VALUE re2_regexp_never_nl(VALUE self) {
749
+ re2_pattern *p;
750
+ Data_Get_Struct(self, re2_pattern, p);
751
+ return BOOL2RUBY(p->pattern->options().never_nl());
752
+ }
663
753
 
664
- /*
665
- * Returns whether or not the regular expression +re2+
666
- * was compiled with the posix_syntax option set to true.
667
- *
668
- * @return [Boolean] the posix_syntax option
669
- * @example
670
- * re2 = RE2::Regexp.new("woo?", :posix_syntax => true)
671
- * re2.posix_syntax? #=> true
672
- */
673
- static VALUE re2_regexp_posix_syntax(VALUE self) {
674
- re2_pattern *p;
675
- Data_Get_Struct(self, re2_pattern, p);
676
- return BOOL2RUBY(p->pattern->options().posix_syntax());
677
- }
754
+ /*
755
+ * Returns whether or not the regular expression +re2+
756
+ * was compiled with the case_sensitive option set to true.
757
+ *
758
+ * @return [Boolean] the case_sensitive option
759
+ * @example
760
+ * re2 = RE2::Regexp.new("woo?", :case_sensitive => true)
761
+ * re2.case_sensitive? #=> true
762
+ */
763
+ static VALUE re2_regexp_case_sensitive(VALUE self) {
764
+ re2_pattern *p;
765
+ Data_Get_Struct(self, re2_pattern, p);
766
+ return BOOL2RUBY(p->pattern->options().case_sensitive());
767
+ }
678
768
 
679
- /*
680
- * Returns whether or not the regular expression +re2+
681
- * was compiled with the longest_match option set to true.
682
- *
683
- * @return [Boolean] the longest_match option
684
- * @example
685
- * re2 = RE2::Regexp.new("woo?", :longest_match => true)
686
- * re2.longest_match? #=> true
687
- */
688
- static VALUE re2_regexp_longest_match(VALUE self) {
689
- re2_pattern *p;
690
- Data_Get_Struct(self, re2_pattern, p);
691
- return BOOL2RUBY(p->pattern->options().longest_match());
692
- }
769
+ /*
770
+ * Returns whether or not the regular expression +re2+
771
+ * was compiled with the case_sensitive option set to false.
772
+ *
773
+ * @return [Boolean] the inverse of the case_sensitive option
774
+ * @example
775
+ * re2 = RE2::Regexp.new("woo?", :case_sensitive => true)
776
+ * re2.case_insensitive? #=> false
777
+ * re2.casefold? #=> false
778
+ */
779
+ static VALUE re2_regexp_case_insensitive(VALUE self) {
780
+ return BOOL2RUBY(re2_regexp_case_sensitive(self) != Qtrue);
781
+ }
693
782
 
694
- /*
695
- * Returns whether or not the regular expression +re2+
696
- * was compiled with the log_errors option set to true.
697
- *
698
- * @return [Boolean] the log_errors option
699
- * @example
700
- * re2 = RE2::Regexp.new("woo?", :log_errors => true)
701
- * re2.log_errors? #=> true
702
- */
703
- static VALUE re2_regexp_log_errors(VALUE self) {
704
- re2_pattern *p;
705
- Data_Get_Struct(self, re2_pattern, p);
706
- return BOOL2RUBY(p->pattern->options().log_errors());
707
- }
783
+ /*
784
+ * Returns whether or not the regular expression +re2+
785
+ * was compiled with the perl_classes option set to true.
786
+ *
787
+ * @return [Boolean] the perl_classes option
788
+ * @example
789
+ * re2 = RE2::Regexp.new("woo?", :perl_classes => true)
790
+ * re2.perl_classes? #=> true
791
+ */
792
+ static VALUE re2_regexp_perl_classes(VALUE self) {
793
+ re2_pattern *p;
794
+ Data_Get_Struct(self, re2_pattern, p);
795
+ return BOOL2RUBY(p->pattern->options().perl_classes());
796
+ }
708
797
 
709
- /*
710
- * Returns the max_mem setting for the regular expression
711
- * +re2+.
712
- *
713
- * @return [Fixnum] the max_mem option
714
- * @example
715
- * re2 = RE2::Regexp.new("woo?", :max_mem => 1024)
716
- * re2.max_mem #=> 1024
717
- */
718
- static VALUE re2_regexp_max_mem(VALUE self) {
719
- re2_pattern *p;
720
- Data_Get_Struct(self, re2_pattern, p);
721
- return INT2FIX(p->pattern->options().max_mem());
722
- }
798
+ /*
799
+ * Returns whether or not the regular expression +re2+
800
+ * was compiled with the word_boundary option set to true.
801
+ *
802
+ * @return [Boolean] the word_boundary option
803
+ * @example
804
+ * re2 = RE2::Regexp.new("woo?", :word_boundary => true)
805
+ * re2.word_boundary? #=> true
806
+ */
807
+ static VALUE re2_regexp_word_boundary(VALUE self) {
808
+ re2_pattern *p;
809
+ Data_Get_Struct(self, re2_pattern, p);
810
+ return BOOL2RUBY(p->pattern->options().word_boundary());
811
+ }
723
812
 
724
- /*
725
- * Returns whether or not the regular expression +re2+
726
- * was compiled with the literal option set to true.
727
- *
728
- * @return [Boolean] the literal option
729
- * @example
730
- * re2 = RE2::Regexp.new("woo?", :literal => true)
731
- * re2.literal? #=> true
732
- */
733
- static VALUE re2_regexp_literal(VALUE self) {
734
- re2_pattern *p;
735
- Data_Get_Struct(self, re2_pattern, p);
736
- return BOOL2RUBY(p->pattern->options().literal());
737
- }
813
+ /*
814
+ * Returns whether or not the regular expression +re2+
815
+ * was compiled with the one_line option set to true.
816
+ *
817
+ * @return [Boolean] the one_line option
818
+ * @example
819
+ * re2 = RE2::Regexp.new("woo?", :one_line => true)
820
+ * re2.one_line? #=> true
821
+ */
822
+ static VALUE re2_regexp_one_line(VALUE self) {
823
+ re2_pattern *p;
824
+ Data_Get_Struct(self, re2_pattern, p);
825
+ return BOOL2RUBY(p->pattern->options().one_line());
826
+ }
738
827
 
739
- /*
740
- * Returns whether or not the regular expression +re2+
741
- * was compiled with the never_nl option set to true.
742
- *
743
- * @return [Boolean] the never_nl option
744
- * @example
745
- * re2 = RE2::Regexp.new("woo?", :never_nl => true)
746
- * re2.never_nl? #=> true
747
- */
748
- static VALUE re2_regexp_never_nl(VALUE self) {
749
- re2_pattern *p;
750
- Data_Get_Struct(self, re2_pattern, p);
751
- return BOOL2RUBY(p->pattern->options().never_nl());
828
+ /*
829
+ * If the RE2 could not be created properly, returns an
830
+ * error string otherwise returns nil.
831
+ *
832
+ * @return [String, nil] the error string or nil
833
+ */
834
+ static VALUE re2_regexp_error(VALUE self) {
835
+ re2_pattern *p;
836
+ Data_Get_Struct(self, re2_pattern, p);
837
+ if (p->pattern->ok()) {
838
+ return Qnil;
839
+ } else {
840
+ return rb_str_new(p->pattern->error().data(), p->pattern->error().size());
752
841
  }
842
+ }
753
843
 
754
- /*
755
- * Returns whether or not the regular expression +re2+
756
- * was compiled with the case_sensitive option set to true.
757
- *
758
- * @return [Boolean] the case_sensitive option
759
- * @example
760
- * re2 = RE2::Regexp.new("woo?", :case_sensitive => true)
761
- * re2.case_sensitive? #=> true
762
- */
763
- static VALUE re2_regexp_case_sensitive(VALUE self) {
764
- re2_pattern *p;
765
- Data_Get_Struct(self, re2_pattern, p);
766
- return BOOL2RUBY(p->pattern->options().case_sensitive());
844
+ /*
845
+ * If the RE2 could not be created properly, returns
846
+ * the offending portion of the regexp otherwise returns nil.
847
+ *
848
+ * @return [String, nil] the offending portion of the regexp or nil
849
+ */
850
+ static VALUE re2_regexp_error_arg(VALUE self) {
851
+ re2_pattern *p;
852
+ Data_Get_Struct(self, re2_pattern, p);
853
+ if (p->pattern->ok()) {
854
+ return Qnil;
855
+ } else {
856
+ return ENCODED_STR_NEW(p->pattern->error_arg().data(),
857
+ p->pattern->error_arg().size(),
858
+ p->pattern->options().utf8() ? "UTF-8" : "ISO-8859-1");
767
859
  }
860
+ }
768
861
 
769
- /*
770
- * Returns whether or not the regular expression +re2+
771
- * was compiled with the case_sensitive option set to false.
772
- *
773
- * @return [Boolean] the inverse of the case_sensitive option
774
- * @example
775
- * re2 = RE2::Regexp.new("woo?", :case_sensitive => true)
776
- * re2.case_insensitive? #=> false
777
- * re2.casefold? #=> false
778
- */
779
- static VALUE re2_regexp_case_insensitive(VALUE self) {
780
- return BOOL2RUBY(re2_regexp_case_sensitive(self) != Qtrue);
781
- }
862
+ /*
863
+ * Returns the program size, a very approximate measure
864
+ * of a regexp's "cost". Larger numbers are more expensive
865
+ * than smaller numbers.
866
+ *
867
+ * @return [Fixnum] the regexp "cost"
868
+ */
869
+ static VALUE re2_regexp_program_size(VALUE self) {
870
+ re2_pattern *p;
871
+ Data_Get_Struct(self, re2_pattern, p);
872
+ return INT2FIX(p->pattern->ProgramSize());
873
+ }
782
874
 
783
- /*
784
- * Returns whether or not the regular expression +re2+
785
- * was compiled with the perl_classes option set to true.
786
- *
787
- * @return [Boolean] the perl_classes option
788
- * @example
789
- * re2 = RE2::Regexp.new("woo?", :perl_classes => true)
790
- * re2.perl_classes? #=> true
791
- */
792
- static VALUE re2_regexp_perl_classes(VALUE self) {
793
- re2_pattern *p;
794
- Data_Get_Struct(self, re2_pattern, p);
795
- return BOOL2RUBY(p->pattern->options().perl_classes());
796
- }
875
+ /*
876
+ * Returns a hash of the options currently set for
877
+ * +re2+.
878
+ *
879
+ * @return [Hash] the options
880
+ */
881
+ static VALUE re2_regexp_options(VALUE self) {
882
+ VALUE options;
883
+ re2_pattern *p;
797
884
 
798
- /*
799
- * Returns whether or not the regular expression +re2+
800
- * was compiled with the word_boundary option set to true.
801
- *
802
- * @return [Boolean] the word_boundary option
803
- * @example
804
- * re2 = RE2::Regexp.new("woo?", :word_boundary => true)
805
- * re2.word_boundary? #=> true
806
- */
807
- static VALUE re2_regexp_word_boundary(VALUE self) {
808
- re2_pattern *p;
809
- Data_Get_Struct(self, re2_pattern, p);
810
- return BOOL2RUBY(p->pattern->options().word_boundary());
811
- }
885
+ Data_Get_Struct(self, re2_pattern, p);
886
+ options = rb_hash_new();
812
887
 
813
- /*
814
- * Returns whether or not the regular expression +re2+
815
- * was compiled with the one_line option set to true.
816
- *
817
- * @return [Boolean] the one_line option
818
- * @example
819
- * re2 = RE2::Regexp.new("woo?", :one_line => true)
820
- * re2.one_line? #=> true
821
- */
822
- static VALUE re2_regexp_one_line(VALUE self) {
823
- re2_pattern *p;
824
- Data_Get_Struct(self, re2_pattern, p);
825
- return BOOL2RUBY(p->pattern->options().one_line());
826
- }
888
+ rb_hash_aset(options, ID2SYM(id_utf8),
889
+ BOOL2RUBY(p->pattern->options().utf8()));
827
890
 
828
- /*
829
- * If the RE2 could not be created properly, returns an
830
- * error string otherwise returns nil.
831
- *
832
- * @return [String, nil] the error string or nil
833
- */
834
- static VALUE re2_regexp_error(VALUE self) {
835
- re2_pattern *p;
836
- Data_Get_Struct(self, re2_pattern, p);
837
- if (p->pattern->ok()) {
838
- return Qnil;
839
- } else {
840
- return rb_str_new(p->pattern->error().data(), p->pattern->error().size());
841
- }
842
- }
891
+ rb_hash_aset(options, ID2SYM(id_posix_syntax),
892
+ BOOL2RUBY(p->pattern->options().posix_syntax()));
843
893
 
844
- /*
845
- * If the RE2 could not be created properly, returns
846
- * the offending portion of the regexp otherwise returns nil.
847
- *
848
- * @return [String, nil] the offending portion of the regexp or nil
849
- */
850
- static VALUE re2_regexp_error_arg(VALUE self) {
851
- re2_pattern *p;
852
- Data_Get_Struct(self, re2_pattern, p);
853
- if (p->pattern->ok()) {
854
- return Qnil;
855
- } else {
856
- return ENCODED_STR_NEW(p->pattern->error_arg().data(),
857
- p->pattern->error_arg().size(),
858
- p->pattern->options().utf8() ? "UTF-8" : "ISO-8859-1");
859
- }
860
- }
894
+ rb_hash_aset(options, ID2SYM(id_longest_match),
895
+ BOOL2RUBY(p->pattern->options().longest_match()));
861
896
 
862
- /*
863
- * Returns the program size, a very approximate measure
864
- * of a regexp's "cost". Larger numbers are more expensive
865
- * than smaller numbers.
866
- *
867
- * @return [Fixnum] the regexp "cost"
868
- */
869
- static VALUE re2_regexp_program_size(VALUE self) {
870
- re2_pattern *p;
871
- Data_Get_Struct(self, re2_pattern, p);
872
- return INT2FIX(p->pattern->ProgramSize());
873
- }
897
+ rb_hash_aset(options, ID2SYM(id_log_errors),
898
+ BOOL2RUBY(p->pattern->options().log_errors()));
899
+
900
+ rb_hash_aset(options, ID2SYM(id_max_mem),
901
+ INT2FIX(p->pattern->options().max_mem()));
874
902
 
875
- /*
876
- * Returns a hash of the options currently set for
877
- * +re2+.
878
- *
879
- * @return [Hash] the options
880
- */
881
- static VALUE re2_regexp_options(VALUE self) {
882
- VALUE options;
883
- re2_pattern *p;
903
+ rb_hash_aset(options, ID2SYM(id_literal),
904
+ BOOL2RUBY(p->pattern->options().literal()));
884
905
 
885
- Data_Get_Struct(self, re2_pattern, p);
886
- options = rb_hash_new();
906
+ rb_hash_aset(options, ID2SYM(id_never_nl),
907
+ BOOL2RUBY(p->pattern->options().never_nl()));
887
908
 
888
- rb_hash_aset(options, ID2SYM(id_utf8),
889
- BOOL2RUBY(p->pattern->options().utf8()));
909
+ rb_hash_aset(options, ID2SYM(id_case_sensitive),
910
+ BOOL2RUBY(p->pattern->options().case_sensitive()));
890
911
 
891
- rb_hash_aset(options, ID2SYM(id_posix_syntax),
892
- BOOL2RUBY(p->pattern->options().posix_syntax()));
912
+ rb_hash_aset(options, ID2SYM(id_perl_classes),
913
+ BOOL2RUBY(p->pattern->options().perl_classes()));
893
914
 
894
- rb_hash_aset(options, ID2SYM(id_longest_match),
895
- BOOL2RUBY(p->pattern->options().longest_match()));
915
+ rb_hash_aset(options, ID2SYM(id_word_boundary),
916
+ BOOL2RUBY(p->pattern->options().word_boundary()));
896
917
 
897
- rb_hash_aset(options, ID2SYM(id_log_errors),
898
- BOOL2RUBY(p->pattern->options().log_errors()));
918
+ rb_hash_aset(options, ID2SYM(id_one_line),
919
+ BOOL2RUBY(p->pattern->options().one_line()));
899
920
 
900
- rb_hash_aset(options, ID2SYM(id_max_mem),
901
- INT2FIX(p->pattern->options().max_mem()));
921
+ /* This is a read-only hash after all... */
922
+ rb_obj_freeze(options);
923
+
924
+ return options;
925
+ }
902
926
 
903
- rb_hash_aset(options, ID2SYM(id_literal),
904
- BOOL2RUBY(p->pattern->options().literal()));
927
+ /*
928
+ * Returns the number of capturing subpatterns, or -1 if the regexp
929
+ * wasn't valid on construction. The overall match ($0) does not
930
+ * count: if the regexp is "(a)(b)", returns 2.
931
+ *
932
+ * @return [Fixnum] the number of capturing subpatterns
933
+ */
934
+ static VALUE re2_regexp_number_of_capturing_groups(VALUE self) {
935
+ re2_pattern *p;
905
936
 
906
- rb_hash_aset(options, ID2SYM(id_never_nl),
907
- BOOL2RUBY(p->pattern->options().never_nl()));
937
+ Data_Get_Struct(self, re2_pattern, p);
938
+ return INT2FIX(p->pattern->NumberOfCapturingGroups());
939
+ }
908
940
 
909
- rb_hash_aset(options, ID2SYM(id_case_sensitive),
910
- BOOL2RUBY(p->pattern->options().case_sensitive()));
941
+ /*
942
+ * Returns a hash of names to capturing indices of groups.
943
+ *
944
+ * @return [Hash] a hash of names to capturing indices
945
+ */
946
+ static VALUE re2_regexp_named_capturing_groups(VALUE self) {
947
+ VALUE capturing_groups;
948
+ re2_pattern *p;
949
+ map<string, int> groups;
950
+ map<string, int>::iterator iterator;
951
+
952
+ Data_Get_Struct(self, re2_pattern, p);
953
+ groups = p->pattern->NamedCapturingGroups();
954
+ capturing_groups = rb_hash_new();
955
+
956
+ for (iterator = groups.begin(); iterator != groups.end(); iterator++) {
957
+ rb_hash_aset(capturing_groups,
958
+ ENCODED_STR_NEW(iterator->first.data(), iterator->first.size(),
959
+ p->pattern->options().utf8() ? "UTF-8" : "ISO-8859-1"),
960
+ INT2FIX(iterator->second));
961
+ }
911
962
 
912
- rb_hash_aset(options, ID2SYM(id_perl_classes),
913
- BOOL2RUBY(p->pattern->options().perl_classes()));
963
+ return capturing_groups;
964
+ }
914
965
 
915
- rb_hash_aset(options, ID2SYM(id_word_boundary),
916
- BOOL2RUBY(p->pattern->options().word_boundary()));
966
+ /*
967
+ * Match the pattern against the given +text+ and return either
968
+ * a boolean (if no submatches are required) or a {RE2::MatchData}
969
+ * instance.
970
+ *
971
+ * @return [Boolean, RE2::MatchData]
972
+ *
973
+ * @overload match(text)
974
+ * Returns an {RE2::MatchData} containing the matching
975
+ * pattern and all subpatterns resulting from looking for
976
+ * the regexp in +text+.
977
+ *
978
+ * @param [String] text the text to search
979
+ * @return [RE2::MatchData] the matches
980
+ * @raise [NoMemoryError] if there was not enough memory to allocate the matches
981
+ * @example
982
+ * r = RE2::Regexp.new('w(o)(o)')
983
+ * r.match('woo') #=> #<RE2::MatchData "woo" 1:"o" 2:"o">
984
+ *
985
+ * @overload match(text, 0)
986
+ * Returns either true or false indicating whether a
987
+ * successful match was made.
988
+ *
989
+ * @param [String] text the text to search
990
+ * @return [Boolean] whether the match was successful
991
+ * @raise [NoMemoryError] if there was not enough memory to allocate the matches
992
+ * @example
993
+ * r = RE2::Regexp.new('w(o)(o)')
994
+ * r.match('woo', 0) #=> true
995
+ * r.match('bob', 0) #=> false
996
+ *
997
+ * @overload match(text, number_of_matches)
998
+ * See +match(text)+ but with a specific number of
999
+ * matches returned (padded with nils if necessary).
1000
+ *
1001
+ * @param [String] text the text to search
1002
+ * @param [Fixnum] number_of_matches the number of matches to return
1003
+ * @return [RE2::MatchData] the matches
1004
+ * @raise [NoMemoryError] if there was not enough memory to allocate the matches
1005
+ * @example
1006
+ * r = RE2::Regexp.new('w(o)(o)')
1007
+ * r.match('woo', 1) #=> #<RE2::MatchData "woo" 1:"o">
1008
+ * r.match('woo', 3) #=> #<RE2::MatchData "woo" 1:"o" 2:"o" 3:nil>
1009
+ */
1010
+ static VALUE re2_regexp_match(int argc, VALUE *argv, VALUE self) {
1011
+ int n;
1012
+ bool matched;
1013
+ re2_pattern *p;
1014
+ re2_matchdata *m;
1015
+ VALUE text, number_of_matches, matchdata;
917
1016
 
918
- rb_hash_aset(options, ID2SYM(id_one_line),
919
- BOOL2RUBY(p->pattern->options().one_line()));
1017
+ rb_scan_args(argc, argv, "11", &text, &number_of_matches);
920
1018
 
921
- /* This is a read-only hash after all... */
922
- rb_obj_freeze(options);
1019
+ /* Ensure text is a string. */
1020
+ text = StringValue(text);
923
1021
 
924
- return options;
925
- }
1022
+ Data_Get_Struct(self, re2_pattern, p);
926
1023
 
927
- /*
928
- * Returns the number of capturing subpatterns, or -1 if the regexp
929
- * wasn't valid on construction. The overall match ($0) does not
930
- * count: if the regexp is "(a)(b)", returns 2.
931
- *
932
- * @return [Fixnum] the number of capturing subpatterns
933
- */
934
- static VALUE re2_regexp_number_of_capturing_groups(VALUE self) {
935
- re2_pattern *p;
936
-
937
- Data_Get_Struct(self, re2_pattern, p);
938
- return INT2FIX(p->pattern->NumberOfCapturingGroups());
1024
+ if (RTEST(number_of_matches)) {
1025
+ n = NUM2INT(number_of_matches);
1026
+ } else {
1027
+ n = p->pattern->NumberOfCapturingGroups();
939
1028
  }
940
1029
 
941
- /*
942
- * Returns a hash of names to capturing indices of groups.
943
- *
944
- * @return [Hash] a hash of names to capturing indices
945
- */
946
- static VALUE re2_regexp_named_capturing_groups(VALUE self) {
947
- VALUE capturing_groups;
948
- re2_pattern *p;
949
- map<string, int> groups;
950
- map<string, int>::iterator iterator;
951
-
952
- Data_Get_Struct(self, re2_pattern, p);
953
- groups = p->pattern->NamedCapturingGroups();
954
- capturing_groups = rb_hash_new();
955
-
956
- for (iterator = groups.begin(); iterator != groups.end(); iterator++) {
957
- rb_hash_aset(capturing_groups,
958
- ENCODED_STR_NEW(iterator->first.data(), iterator->first.size(),
959
- p->pattern->options().utf8() ? "UTF-8" : "ISO-8859-1"),
960
- INT2FIX(iterator->second));
1030
+ if (n == 0) {
1031
+ matched = match(p->pattern, StringValuePtr(text), 0,
1032
+ static_cast<int>(RSTRING_LEN(text)), RE2::UNANCHORED, 0, 0);
1033
+ return BOOL2RUBY(matched);
1034
+ } else {
1035
+
1036
+ /* Because match returns the whole match as well. */
1037
+ n += 1;
1038
+
1039
+ matchdata = rb_class_new_instance(0, 0, re2_cMatchData);
1040
+ Data_Get_Struct(matchdata, re2_matchdata, m);
1041
+ m->matches = new(nothrow) re2::StringPiece[n];
1042
+ m->regexp = self;
1043
+ m->text = rb_str_dup(text);
1044
+ rb_str_freeze(m->text);
1045
+
1046
+ if (m->matches == 0) {
1047
+ rb_raise(rb_eNoMemError,
1048
+ "not enough memory to allocate StringPieces for matches");
961
1049
  }
962
1050
 
963
- return capturing_groups;
964
- }
1051
+ m->number_of_matches = n;
1052
+
1053
+ matched = match(p->pattern, StringValuePtr(text), 0,
1054
+ static_cast<int>(RSTRING_LEN(text)),
1055
+ RE2::UNANCHORED, m->matches, n);
965
1056
 
966
- /*
967
- * Match the pattern against the given +text+ and return either
968
- * a boolean (if no submatches are required) or a {RE2::MatchData}
969
- * instance.
970
- *
971
- * @return [Boolean, RE2::MatchData]
972
- *
973
- * @overload match(text)
974
- * Returns an {RE2::MatchData} containing the matching
975
- * pattern and all subpatterns resulting from looking for
976
- * the regexp in +text+.
977
- *
978
- * @param [String] text the text to search
979
- * @return [RE2::MatchData] the matches
980
- * @raise [NoMemoryError] if there was not enough memory to allocate the matches
981
- * @example
982
- * r = RE2::Regexp.new('w(o)(o)')
983
- * r.match('woo') #=> #<RE2::MatchData "woo" 1:"o" 2:"o">
984
- *
985
- * @overload match(text, 0)
986
- * Returns either true or false indicating whether a
987
- * successful match was made.
988
- *
989
- * @param [String] text the text to search
990
- * @return [Boolean] whether the match was successful
991
- * @raise [NoMemoryError] if there was not enough memory to allocate the matches
992
- * @example
993
- * r = RE2::Regexp.new('w(o)(o)')
994
- * r.match('woo', 0) #=> true
995
- * r.match('bob', 0) #=> false
996
- *
997
- * @overload match(text, number_of_matches)
998
- * See +match(text)+ but with a specific number of
999
- * matches returned (padded with nils if necessary).
1000
- *
1001
- * @param [String] text the text to search
1002
- * @param [Fixnum] number_of_matches the number of matches to return
1003
- * @return [RE2::MatchData] the matches
1004
- * @raise [NoMemoryError] if there was not enough memory to allocate the matches
1005
- * @example
1006
- * r = RE2::Regexp.new('w(o)(o)')
1007
- * r.match('woo', 1) #=> #<RE2::MatchData "woo" 1:"o">
1008
- * r.match('woo', 3) #=> #<RE2::MatchData "woo" 1:"o" 2:"o" 3:nil>
1009
- */
1010
- static VALUE re2_regexp_match(int argc, VALUE *argv, VALUE self) {
1011
- int n;
1012
- bool matched;
1013
- re2_pattern *p;
1014
- re2_matchdata *m;
1015
- VALUE text, number_of_matches, matchdata;
1016
-
1017
- rb_scan_args(argc, argv, "11", &text, &number_of_matches);
1018
-
1019
- Data_Get_Struct(self, re2_pattern, p);
1020
-
1021
- if (RTEST(number_of_matches)) {
1022
- n = NUM2INT(number_of_matches);
1057
+ if (matched) {
1058
+ return matchdata;
1023
1059
  } else {
1024
- n = p->pattern->NumberOfCapturingGroups();
1060
+ return Qnil;
1025
1061
  }
1062
+ }
1063
+ }
1026
1064
 
1027
- if (n == 0) {
1028
- matched = match(p->pattern, StringValuePtr(text), 0,
1029
- static_cast<int>(RSTRING_LEN(text)), RE2::UNANCHORED, 0, 0);
1030
- return BOOL2RUBY(matched);
1031
- } else {
1065
+ /*
1066
+ * Returns true or false to indicate a successful match.
1067
+ * Equivalent to +re2.match(text, 0)+.
1068
+ *
1069
+ * @return [Boolean] whether the match was successful
1070
+ */
1071
+ static VALUE re2_regexp_match_query(VALUE self, VALUE text) {
1072
+ VALUE argv[2];
1073
+ argv[0] = text;
1074
+ argv[1] = INT2FIX(0);
1032
1075
 
1033
- /* Because match returns the whole match as well. */
1034
- n += 1;
1076
+ return re2_regexp_match(2, argv, self);
1077
+ }
1035
1078
 
1036
- matchdata = rb_class_new_instance(0, 0, re2_cMatchData);
1037
- Data_Get_Struct(matchdata, re2_matchdata, m);
1038
- m->matches = new(nothrow) re2::StringPiece[n];
1039
- m->regexp = self;
1040
- m->text = rb_str_dup(text);
1041
- rb_str_freeze(m->text);
1079
+ /*
1080
+ * Returns a {RE2::Scanner} for scanning the given text incrementally.
1081
+ *
1082
+ * @example
1083
+ * c = RE2::Regexp.new('(\w+)').scan("Foo bar baz")
1084
+ */
1085
+ static VALUE re2_regexp_scan(VALUE self, VALUE text) {
1086
+ re2_pattern *p;
1087
+ re2_scanner *c;
1088
+ VALUE scanner;
1042
1089
 
1043
- if (m->matches == 0) {
1044
- rb_raise(rb_eNoMemError,
1045
- "not enough memory to allocate StringPieces for matches");
1046
- }
1090
+ Data_Get_Struct(self, re2_pattern, p);
1091
+ scanner = rb_class_new_instance(0, 0, re2_cScanner);
1092
+ Data_Get_Struct(scanner, re2_scanner, c);
1047
1093
 
1048
- m->number_of_matches = n;
1094
+ c->input = new(nothrow) re2::StringPiece(StringValuePtr(text));
1095
+ c->regexp = self;
1096
+ c->text = text;
1097
+ c->number_of_capturing_groups = p->pattern->NumberOfCapturingGroups();
1049
1098
 
1050
- matched = match(p->pattern, StringValuePtr(text), 0,
1051
- static_cast<int>(RSTRING_LEN(text)),
1052
- RE2::UNANCHORED, m->matches, n);
1099
+ return scanner;
1100
+ }
1053
1101
 
1054
- if (matched) {
1055
- return matchdata;
1056
- } else {
1057
- return Qnil;
1058
- }
1059
- }
1060
- }
1102
+ /*
1103
+ * Returns a copy of +str+ with the first occurrence +pattern+
1104
+ * replaced with +rewrite+.
1105
+ *
1106
+ * @param [String] str the string to modify
1107
+ * @param [String, RE2::Regexp] pattern a regexp matching text to be replaced
1108
+ * @param [String] rewrite the string to replace with
1109
+ * @return [String] the resulting string
1110
+ * @example
1111
+ * RE2.Replace("hello there", "hello", "howdy") #=> "howdy there"
1112
+ * re2 = RE2.new("hel+o")
1113
+ * RE2.Replace("hello there", re2, "yo") #=> "yo there"
1114
+ */
1115
+ static VALUE re2_Replace(VALUE self, VALUE str, VALUE pattern,
1116
+ VALUE rewrite) {
1117
+ UNUSED(self);
1118
+ re2_pattern *p;
1061
1119
 
1062
- /*
1063
- * Returns true or false to indicate a successful match.
1064
- * Equivalent to +re2.match(text, 0)+.
1065
- *
1066
- * @return [Boolean] whether the match was successful
1067
- */
1068
- static VALUE re2_regexp_match_query(VALUE self, VALUE text) {
1069
- VALUE argv[2];
1070
- argv[0] = text;
1071
- argv[1] = INT2FIX(0);
1072
-
1073
- return re2_regexp_match(2, argv, self);
1074
- }
1120
+ /* Convert all the inputs to be pumped into RE2::Replace. */
1121
+ string str_as_string(StringValuePtr(str));
1075
1122
 
1076
- /*
1077
- * Returns a {RE2::Scanner} for scanning the given text incrementally.
1078
- *
1079
- * @example
1080
- * c = RE2::Regexp.new('(\w+)').scan("Foo bar baz")
1081
- */
1082
- static VALUE re2_regexp_scan(VALUE self, VALUE text) {
1083
- re2_pattern *p;
1084
- re2_scanner *c;
1085
- VALUE scanner;
1086
-
1087
- Data_Get_Struct(self, re2_pattern, p);
1088
- scanner = rb_class_new_instance(0, 0, re2_cScanner);
1089
- Data_Get_Struct(scanner, re2_scanner, c);
1090
-
1091
- c->input = new(nothrow) re2::StringPiece(StringValuePtr(text));
1092
- c->regexp = self;
1093
- c->text = text;
1094
- c->number_of_capturing_groups = p->pattern->NumberOfCapturingGroups();
1095
-
1096
- return scanner;
1123
+ /* Do the replacement. */
1124
+ if (rb_obj_is_kind_of(pattern, re2_cRegexp)) {
1125
+ Data_Get_Struct(pattern, re2_pattern, p);
1126
+ RE2::Replace(&str_as_string, *p->pattern, StringValuePtr(rewrite));
1127
+
1128
+ return ENCODED_STR_NEW(str_as_string.data(), str_as_string.size(),
1129
+ p->pattern->options().utf8() ? "UTF-8" : "ISO-8859-1");
1130
+ } else {
1131
+ RE2::Replace(&str_as_string, StringValuePtr(pattern),
1132
+ StringValuePtr(rewrite));
1133
+
1134
+ return ENCODED_STR_NEW2(str_as_string.data(), str_as_string.size(),
1135
+ pattern);
1097
1136
  }
1098
1137
 
1099
- /*
1100
- * Returns a copy of +str+ with the first occurrence +pattern+
1101
- * replaced with +rewrite+.
1102
- *
1103
- * @param [String] str the string to modify
1104
- * @param [String, RE2::Regexp] pattern a regexp matching text to be replaced
1105
- * @param [String] rewrite the string to replace with
1106
- * @return [String] the resulting string
1107
- * @example
1108
- * RE2.Replace("hello there", "hello", "howdy") #=> "howdy there"
1109
- * re2 = RE2.new("hel+o")
1110
- * RE2.Replace("hello there", re2, "yo") #=> "yo there"
1111
- */
1112
- static VALUE re2_Replace(VALUE self, VALUE str, VALUE pattern,
1113
- VALUE rewrite) {
1114
- UNUSED(self);
1115
- re2_pattern *p;
1116
-
1117
- /* Convert all the inputs to be pumped into RE2::Replace. */
1118
- string str_as_string(StringValuePtr(str));
1119
-
1120
- /* Do the replacement. */
1121
- if (rb_obj_is_kind_of(pattern, re2_cRegexp)) {
1122
- Data_Get_Struct(pattern, re2_pattern, p);
1123
- RE2::Replace(&str_as_string, *p->pattern, StringValuePtr(rewrite));
1124
-
1125
- return ENCODED_STR_NEW(str_as_string.data(), str_as_string.size(),
1126
- p->pattern->options().utf8() ? "UTF-8" : "ISO-8859-1");
1127
- } else {
1128
- RE2::Replace(&str_as_string, StringValuePtr(pattern),
1129
- StringValuePtr(rewrite));
1138
+ }
1130
1139
 
1131
- return ENCODED_STR_NEW2(str_as_string.data(), str_as_string.size(),
1132
- pattern);
1133
- }
1140
+ /*
1141
+ * Return a copy of +str+ with +pattern+ replaced by +rewrite+.
1142
+ *
1143
+ * @param [String] str the string to modify
1144
+ * @param [String, RE2::Regexp] pattern a regexp matching text to be replaced
1145
+ * @param [String] rewrite the string to replace with
1146
+ * @return [String] the resulting string
1147
+ * @example
1148
+ * re2 = RE2.new("oo?")
1149
+ * RE2.GlobalReplace("whoops-doops", re2, "e") #=> "wheps-deps"
1150
+ * RE2.GlobalReplace("hello there", "e", "i") #=> "hillo thiri"
1151
+ */
1152
+ static VALUE re2_GlobalReplace(VALUE self, VALUE str, VALUE pattern,
1153
+ VALUE rewrite) {
1154
+ UNUSED(self);
1134
1155
 
1135
- }
1156
+ /* Convert all the inputs to be pumped into RE2::GlobalReplace. */
1157
+ re2_pattern *p;
1158
+ string str_as_string(StringValuePtr(str));
1136
1159
 
1137
- /*
1138
- * Return a copy of +str+ with +pattern+ replaced by +rewrite+.
1139
- *
1140
- * @param [String] str the string to modify
1141
- * @param [String, RE2::Regexp] pattern a regexp matching text to be replaced
1142
- * @param [String] rewrite the string to replace with
1143
- * @return [String] the resulting string
1144
- * @example
1145
- * re2 = RE2.new("oo?")
1146
- * RE2.GlobalReplace("whoops-doops", re2, "e") #=> "wheps-deps"
1147
- * RE2.GlobalReplace("hello there", "e", "i") #=> "hillo thiri"
1148
- */
1149
- static VALUE re2_GlobalReplace(VALUE self, VALUE str, VALUE pattern,
1150
- VALUE rewrite) {
1151
- UNUSED(self);
1152
-
1153
- /* Convert all the inputs to be pumped into RE2::GlobalReplace. */
1154
- re2_pattern *p;
1155
- string str_as_string(StringValuePtr(str));
1156
-
1157
- /* Do the replacement. */
1158
- if (rb_obj_is_kind_of(pattern, re2_cRegexp)) {
1159
- Data_Get_Struct(pattern, re2_pattern, p);
1160
- RE2::GlobalReplace(&str_as_string, *p->pattern, StringValuePtr(rewrite));
1161
-
1162
- return ENCODED_STR_NEW(str_as_string.data(), str_as_string.size(),
1163
- p->pattern->options().utf8() ? "UTF-8" : "ISO-8859-1");
1164
- } else {
1165
- RE2::GlobalReplace(&str_as_string, StringValuePtr(pattern),
1166
- StringValuePtr(rewrite));
1160
+ /* Do the replacement. */
1161
+ if (rb_obj_is_kind_of(pattern, re2_cRegexp)) {
1162
+ Data_Get_Struct(pattern, re2_pattern, p);
1163
+ RE2::GlobalReplace(&str_as_string, *p->pattern, StringValuePtr(rewrite));
1167
1164
 
1168
- return ENCODED_STR_NEW2(str_as_string.data(), str_as_string.size(),
1169
- pattern);
1170
- }
1171
- }
1165
+ return ENCODED_STR_NEW(str_as_string.data(), str_as_string.size(),
1166
+ p->pattern->options().utf8() ? "UTF-8" : "ISO-8859-1");
1167
+ } else {
1168
+ RE2::GlobalReplace(&str_as_string, StringValuePtr(pattern),
1169
+ StringValuePtr(rewrite));
1172
1170
 
1173
- /*
1174
- * Returns a version of str with all potentially meaningful regexp
1175
- * characters escaped. The returned string, used as a regular
1176
- * expression, will exactly match the original string.
1177
- *
1178
- * @param [String] unquoted the unquoted string
1179
- * @return [String] the escaped string
1180
- * @example
1181
- * RE2::Regexp.escape("1.5-2.0?") #=> "1\.5\-2\.0\?"
1182
- */
1183
- static VALUE re2_QuoteMeta(VALUE self, VALUE unquoted) {
1184
- UNUSED(self);
1185
- string quoted_string = RE2::QuoteMeta(StringValuePtr(unquoted));
1186
- return rb_str_new(quoted_string.data(), quoted_string.size());
1171
+ return ENCODED_STR_NEW2(str_as_string.data(), str_as_string.size(),
1172
+ pattern);
1187
1173
  }
1174
+ }
1188
1175
 
1189
- void Init_re2(void) {
1190
- re2_mRE2 = rb_define_module("RE2");
1191
- re2_cRegexp = rb_define_class_under(re2_mRE2, "Regexp", rb_cObject);
1192
- re2_cMatchData = rb_define_class_under(re2_mRE2, "MatchData", rb_cObject);
1193
- re2_cScanner = rb_define_class_under(re2_mRE2, "Scanner", rb_cObject);
1194
-
1195
- rb_define_alloc_func(re2_cRegexp, (VALUE (*)(VALUE))re2_regexp_allocate);
1196
- rb_define_alloc_func(re2_cMatchData,
1197
- (VALUE (*)(VALUE))re2_matchdata_allocate);
1198
- rb_define_alloc_func(re2_cScanner,
1199
- (VALUE (*)(VALUE))re2_scanner_allocate);
1200
-
1201
- rb_define_method(re2_cMatchData, "string",
1202
- RUBY_METHOD_FUNC(re2_matchdata_string), 0);
1203
- rb_define_method(re2_cMatchData, "regexp",
1204
- RUBY_METHOD_FUNC(re2_matchdata_regexp), 0);
1205
- rb_define_method(re2_cMatchData, "to_a",
1206
- RUBY_METHOD_FUNC(re2_matchdata_to_a), 0);
1207
- rb_define_method(re2_cMatchData, "size",
1208
- RUBY_METHOD_FUNC(re2_matchdata_size), 0);
1209
- rb_define_method(re2_cMatchData, "length",
1210
- RUBY_METHOD_FUNC(re2_matchdata_size), 0);
1211
- rb_define_method(re2_cMatchData, "[]", RUBY_METHOD_FUNC(re2_matchdata_aref),
1212
- -1); rb_define_method(re2_cMatchData, "to_s",
1213
- RUBY_METHOD_FUNC(re2_matchdata_to_s), 0);
1214
- rb_define_method(re2_cMatchData, "inspect",
1215
- RUBY_METHOD_FUNC(re2_matchdata_inspect), 0);
1216
-
1217
- rb_define_method(re2_cScanner, "string",
1218
- RUBY_METHOD_FUNC(re2_scanner_string), 0);
1219
- rb_define_method(re2_cScanner, "regexp",
1220
- RUBY_METHOD_FUNC(re2_scanner_regexp), 0);
1221
- rb_define_method(re2_cScanner, "scan",
1222
- RUBY_METHOD_FUNC(re2_scanner_scan), 0);
1223
- rb_define_method(re2_cScanner, "rewind",
1224
- RUBY_METHOD_FUNC(re2_scanner_rewind), 0);
1225
-
1226
- rb_define_method(re2_cRegexp, "initialize",
1227
- RUBY_METHOD_FUNC(re2_regexp_initialize), -1);
1228
- rb_define_method(re2_cRegexp, "ok?", RUBY_METHOD_FUNC(re2_regexp_ok), 0);
1229
- rb_define_method(re2_cRegexp, "error", RUBY_METHOD_FUNC(re2_regexp_error),
1230
- 0);
1231
- rb_define_method(re2_cRegexp, "error_arg",
1232
- RUBY_METHOD_FUNC(re2_regexp_error_arg), 0);
1233
- rb_define_method(re2_cRegexp, "program_size",
1234
- RUBY_METHOD_FUNC(re2_regexp_program_size), 0);
1235
- rb_define_method(re2_cRegexp, "options",
1236
- RUBY_METHOD_FUNC(re2_regexp_options), 0);
1237
- rb_define_method(re2_cRegexp, "number_of_capturing_groups",
1238
- RUBY_METHOD_FUNC(re2_regexp_number_of_capturing_groups), 0);
1239
- rb_define_method(re2_cRegexp, "named_capturing_groups",
1240
- RUBY_METHOD_FUNC(re2_regexp_named_capturing_groups), 0);
1241
- rb_define_method(re2_cRegexp, "match", RUBY_METHOD_FUNC(re2_regexp_match),
1242
- -1);
1243
- rb_define_method(re2_cRegexp, "match?",
1244
- RUBY_METHOD_FUNC(re2_regexp_match_query), 1);
1245
- rb_define_method(re2_cRegexp, "=~",
1246
- RUBY_METHOD_FUNC(re2_regexp_match_query), 1);
1247
- rb_define_method(re2_cRegexp, "===",
1248
- RUBY_METHOD_FUNC(re2_regexp_match_query), 1);
1249
- rb_define_method(re2_cRegexp, "scan",
1250
- RUBY_METHOD_FUNC(re2_regexp_scan), 1);
1251
- rb_define_method(re2_cRegexp, "to_s", RUBY_METHOD_FUNC(re2_regexp_to_s), 0);
1252
- rb_define_method(re2_cRegexp, "to_str", RUBY_METHOD_FUNC(re2_regexp_to_s),
1253
- 0);
1254
- rb_define_method(re2_cRegexp, "pattern", RUBY_METHOD_FUNC(re2_regexp_to_s),
1255
- 0);
1256
- rb_define_method(re2_cRegexp, "source", RUBY_METHOD_FUNC(re2_regexp_to_s),
1257
- 0);
1258
- rb_define_method(re2_cRegexp, "inspect",
1259
- RUBY_METHOD_FUNC(re2_regexp_inspect), 0);
1260
- rb_define_method(re2_cRegexp, "utf8?", RUBY_METHOD_FUNC(re2_regexp_utf8),
1261
- 0);
1262
- rb_define_method(re2_cRegexp, "posix_syntax?",
1263
- RUBY_METHOD_FUNC(re2_regexp_posix_syntax), 0);
1264
- rb_define_method(re2_cRegexp, "longest_match?",
1265
- RUBY_METHOD_FUNC(re2_regexp_longest_match), 0);
1266
- rb_define_method(re2_cRegexp, "log_errors?",
1267
- RUBY_METHOD_FUNC(re2_regexp_log_errors), 0);
1268
- rb_define_method(re2_cRegexp, "max_mem",
1269
- RUBY_METHOD_FUNC(re2_regexp_max_mem), 0);
1270
- rb_define_method(re2_cRegexp, "literal?",
1271
- RUBY_METHOD_FUNC(re2_regexp_literal), 0);
1272
- rb_define_method(re2_cRegexp, "never_nl?",
1273
- RUBY_METHOD_FUNC(re2_regexp_never_nl), 0);
1274
- rb_define_method(re2_cRegexp, "case_sensitive?",
1275
- RUBY_METHOD_FUNC(re2_regexp_case_sensitive), 0);
1276
- rb_define_method(re2_cRegexp, "case_insensitive?",
1277
- RUBY_METHOD_FUNC(re2_regexp_case_insensitive), 0);
1278
- rb_define_method(re2_cRegexp, "casefold?",
1279
- RUBY_METHOD_FUNC(re2_regexp_case_insensitive), 0);
1280
- rb_define_method(re2_cRegexp, "perl_classes?",
1281
- RUBY_METHOD_FUNC(re2_regexp_perl_classes), 0);
1282
- rb_define_method(re2_cRegexp, "word_boundary?",
1283
- RUBY_METHOD_FUNC(re2_regexp_word_boundary), 0);
1284
- rb_define_method(re2_cRegexp, "one_line?",
1285
- RUBY_METHOD_FUNC(re2_regexp_one_line), 0);
1286
-
1287
- rb_define_module_function(re2_mRE2, "Replace",
1288
- RUBY_METHOD_FUNC(re2_Replace), 3);
1289
- rb_define_module_function(re2_mRE2, "GlobalReplace",
1290
- RUBY_METHOD_FUNC(re2_GlobalReplace), 3);
1291
- rb_define_module_function(re2_mRE2, "QuoteMeta",
1292
- RUBY_METHOD_FUNC(re2_QuoteMeta), 1);
1293
- rb_define_singleton_method(re2_cRegexp, "escape",
1294
- RUBY_METHOD_FUNC(re2_QuoteMeta), 1);
1295
- rb_define_singleton_method(re2_cRegexp, "quote",
1296
- RUBY_METHOD_FUNC(re2_QuoteMeta), 1);
1297
- rb_define_singleton_method(re2_cRegexp, "compile",
1298
- RUBY_METHOD_FUNC(rb_class_new_instance), -1);
1299
-
1300
- rb_define_global_function("RE2", RUBY_METHOD_FUNC(re2_re2), -1);
1301
-
1302
- /* Create the symbols used in options. */
1303
- id_utf8 = rb_intern("utf8");
1304
- id_posix_syntax = rb_intern("posix_syntax");
1305
- id_longest_match = rb_intern("longest_match");
1306
- id_log_errors = rb_intern("log_errors");
1307
- id_max_mem = rb_intern("max_mem");
1308
- id_literal = rb_intern("literal");
1309
- id_never_nl = rb_intern("never_nl");
1310
- id_case_sensitive = rb_intern("case_sensitive");
1311
- id_perl_classes = rb_intern("perl_classes");
1312
- id_word_boundary = rb_intern("word_boundary");
1313
- id_one_line = rb_intern("one_line");
1314
-
1315
- #if 0
1316
- /* Fake so YARD generates the file. */
1317
- rb_mKernel = rb_define_module("Kernel");
1318
- #endif
1319
- }
1176
+ /*
1177
+ * Returns a version of str with all potentially meaningful regexp
1178
+ * characters escaped. The returned string, used as a regular
1179
+ * expression, will exactly match the original string.
1180
+ *
1181
+ * @param [String] unquoted the unquoted string
1182
+ * @return [String] the escaped string
1183
+ * @example
1184
+ * RE2::Regexp.escape("1.5-2.0?") #=> "1\.5\-2\.0\?"
1185
+ */
1186
+ static VALUE re2_QuoteMeta(VALUE self, VALUE unquoted) {
1187
+ UNUSED(self);
1188
+ string quoted_string = RE2::QuoteMeta(StringValuePtr(unquoted));
1189
+ return rb_str_new(quoted_string.data(), quoted_string.size());
1190
+ }
1191
+
1192
+ /* Forward declare Init_re2 to be called by C code but define it separately so
1193
+ * that YARD can parse it.
1194
+ */
1195
+ extern "C" void Init_re2(void);
1196
+
1197
+ void Init_re2(void) {
1198
+ re2_mRE2 = rb_define_module("RE2");
1199
+ re2_cRegexp = rb_define_class_under(re2_mRE2, "Regexp", rb_cObject);
1200
+ re2_cMatchData = rb_define_class_under(re2_mRE2, "MatchData", rb_cObject);
1201
+ re2_cScanner = rb_define_class_under(re2_mRE2, "Scanner", rb_cObject);
1202
+
1203
+ rb_define_alloc_func(re2_cRegexp, (VALUE (*)(VALUE))re2_regexp_allocate);
1204
+ rb_define_alloc_func(re2_cMatchData,
1205
+ (VALUE (*)(VALUE))re2_matchdata_allocate);
1206
+ rb_define_alloc_func(re2_cScanner,
1207
+ (VALUE (*)(VALUE))re2_scanner_allocate);
1208
+
1209
+ rb_define_method(re2_cMatchData, "string",
1210
+ RUBY_METHOD_FUNC(re2_matchdata_string), 0);
1211
+ rb_define_method(re2_cMatchData, "regexp",
1212
+ RUBY_METHOD_FUNC(re2_matchdata_regexp), 0);
1213
+ rb_define_method(re2_cMatchData, "to_a",
1214
+ RUBY_METHOD_FUNC(re2_matchdata_to_a), 0);
1215
+ rb_define_method(re2_cMatchData, "size",
1216
+ RUBY_METHOD_FUNC(re2_matchdata_size), 0);
1217
+ rb_define_method(re2_cMatchData, "length",
1218
+ RUBY_METHOD_FUNC(re2_matchdata_size), 0);
1219
+ rb_define_method(re2_cMatchData, "[]", RUBY_METHOD_FUNC(re2_matchdata_aref),
1220
+ -1); rb_define_method(re2_cMatchData, "to_s",
1221
+ RUBY_METHOD_FUNC(re2_matchdata_to_s), 0);
1222
+ rb_define_method(re2_cMatchData, "inspect",
1223
+ RUBY_METHOD_FUNC(re2_matchdata_inspect), 0);
1224
+
1225
+ rb_define_method(re2_cScanner, "string",
1226
+ RUBY_METHOD_FUNC(re2_scanner_string), 0);
1227
+ rb_define_method(re2_cScanner, "regexp",
1228
+ RUBY_METHOD_FUNC(re2_scanner_regexp), 0);
1229
+ rb_define_method(re2_cScanner, "scan",
1230
+ RUBY_METHOD_FUNC(re2_scanner_scan), 0);
1231
+ rb_define_method(re2_cScanner, "rewind",
1232
+ RUBY_METHOD_FUNC(re2_scanner_rewind), 0);
1233
+
1234
+ rb_define_method(re2_cRegexp, "initialize",
1235
+ RUBY_METHOD_FUNC(re2_regexp_initialize), -1);
1236
+ rb_define_method(re2_cRegexp, "ok?", RUBY_METHOD_FUNC(re2_regexp_ok), 0);
1237
+ rb_define_method(re2_cRegexp, "error", RUBY_METHOD_FUNC(re2_regexp_error),
1238
+ 0);
1239
+ rb_define_method(re2_cRegexp, "error_arg",
1240
+ RUBY_METHOD_FUNC(re2_regexp_error_arg), 0);
1241
+ rb_define_method(re2_cRegexp, "program_size",
1242
+ RUBY_METHOD_FUNC(re2_regexp_program_size), 0);
1243
+ rb_define_method(re2_cRegexp, "options",
1244
+ RUBY_METHOD_FUNC(re2_regexp_options), 0);
1245
+ rb_define_method(re2_cRegexp, "number_of_capturing_groups",
1246
+ RUBY_METHOD_FUNC(re2_regexp_number_of_capturing_groups), 0);
1247
+ rb_define_method(re2_cRegexp, "named_capturing_groups",
1248
+ RUBY_METHOD_FUNC(re2_regexp_named_capturing_groups), 0);
1249
+ rb_define_method(re2_cRegexp, "match", RUBY_METHOD_FUNC(re2_regexp_match),
1250
+ -1);
1251
+ rb_define_method(re2_cRegexp, "match?",
1252
+ RUBY_METHOD_FUNC(re2_regexp_match_query), 1);
1253
+ rb_define_method(re2_cRegexp, "=~",
1254
+ RUBY_METHOD_FUNC(re2_regexp_match_query), 1);
1255
+ rb_define_method(re2_cRegexp, "===",
1256
+ RUBY_METHOD_FUNC(re2_regexp_match_query), 1);
1257
+ rb_define_method(re2_cRegexp, "scan",
1258
+ RUBY_METHOD_FUNC(re2_regexp_scan), 1);
1259
+ rb_define_method(re2_cRegexp, "to_s", RUBY_METHOD_FUNC(re2_regexp_to_s), 0);
1260
+ rb_define_method(re2_cRegexp, "to_str", RUBY_METHOD_FUNC(re2_regexp_to_s),
1261
+ 0);
1262
+ rb_define_method(re2_cRegexp, "pattern", RUBY_METHOD_FUNC(re2_regexp_to_s),
1263
+ 0);
1264
+ rb_define_method(re2_cRegexp, "source", RUBY_METHOD_FUNC(re2_regexp_to_s),
1265
+ 0);
1266
+ rb_define_method(re2_cRegexp, "inspect",
1267
+ RUBY_METHOD_FUNC(re2_regexp_inspect), 0);
1268
+ rb_define_method(re2_cRegexp, "utf8?", RUBY_METHOD_FUNC(re2_regexp_utf8),
1269
+ 0);
1270
+ rb_define_method(re2_cRegexp, "posix_syntax?",
1271
+ RUBY_METHOD_FUNC(re2_regexp_posix_syntax), 0);
1272
+ rb_define_method(re2_cRegexp, "longest_match?",
1273
+ RUBY_METHOD_FUNC(re2_regexp_longest_match), 0);
1274
+ rb_define_method(re2_cRegexp, "log_errors?",
1275
+ RUBY_METHOD_FUNC(re2_regexp_log_errors), 0);
1276
+ rb_define_method(re2_cRegexp, "max_mem",
1277
+ RUBY_METHOD_FUNC(re2_regexp_max_mem), 0);
1278
+ rb_define_method(re2_cRegexp, "literal?",
1279
+ RUBY_METHOD_FUNC(re2_regexp_literal), 0);
1280
+ rb_define_method(re2_cRegexp, "never_nl?",
1281
+ RUBY_METHOD_FUNC(re2_regexp_never_nl), 0);
1282
+ rb_define_method(re2_cRegexp, "case_sensitive?",
1283
+ RUBY_METHOD_FUNC(re2_regexp_case_sensitive), 0);
1284
+ rb_define_method(re2_cRegexp, "case_insensitive?",
1285
+ RUBY_METHOD_FUNC(re2_regexp_case_insensitive), 0);
1286
+ rb_define_method(re2_cRegexp, "casefold?",
1287
+ RUBY_METHOD_FUNC(re2_regexp_case_insensitive), 0);
1288
+ rb_define_method(re2_cRegexp, "perl_classes?",
1289
+ RUBY_METHOD_FUNC(re2_regexp_perl_classes), 0);
1290
+ rb_define_method(re2_cRegexp, "word_boundary?",
1291
+ RUBY_METHOD_FUNC(re2_regexp_word_boundary), 0);
1292
+ rb_define_method(re2_cRegexp, "one_line?",
1293
+ RUBY_METHOD_FUNC(re2_regexp_one_line), 0);
1294
+
1295
+ rb_define_module_function(re2_mRE2, "Replace",
1296
+ RUBY_METHOD_FUNC(re2_Replace), 3);
1297
+ rb_define_module_function(re2_mRE2, "GlobalReplace",
1298
+ RUBY_METHOD_FUNC(re2_GlobalReplace), 3);
1299
+ rb_define_module_function(re2_mRE2, "QuoteMeta",
1300
+ RUBY_METHOD_FUNC(re2_QuoteMeta), 1);
1301
+ rb_define_singleton_method(re2_cRegexp, "escape",
1302
+ RUBY_METHOD_FUNC(re2_QuoteMeta), 1);
1303
+ rb_define_singleton_method(re2_cRegexp, "quote",
1304
+ RUBY_METHOD_FUNC(re2_QuoteMeta), 1);
1305
+ rb_define_singleton_method(re2_cRegexp, "compile",
1306
+ RUBY_METHOD_FUNC(rb_class_new_instance), -1);
1307
+
1308
+ rb_define_global_function("RE2", RUBY_METHOD_FUNC(re2_re2), -1);
1309
+
1310
+ /* Create the symbols used in options. */
1311
+ id_utf8 = rb_intern("utf8");
1312
+ id_posix_syntax = rb_intern("posix_syntax");
1313
+ id_longest_match = rb_intern("longest_match");
1314
+ id_log_errors = rb_intern("log_errors");
1315
+ id_max_mem = rb_intern("max_mem");
1316
+ id_literal = rb_intern("literal");
1317
+ id_never_nl = rb_intern("never_nl");
1318
+ id_case_sensitive = rb_intern("case_sensitive");
1319
+ id_perl_classes = rb_intern("perl_classes");
1320
+ id_word_boundary = rb_intern("word_boundary");
1321
+ id_one_line = rb_intern("one_line");
1322
+
1323
+ #if 0
1324
+ /* Fake so YARD generates the file. */
1325
+ rb_mKernel = rb_define_module("Kernel");
1326
+ #endif
1320
1327
  }