re2 0.6.0 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (6) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +5 -2
  3. data/Rakefile +4 -0
  4. data/ext/re2/re2.cc +1168 -1161
  5. data/spec/re2/regexp_spec.rb +4 -0
  6. metadata +12 -11
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: dbde479e41f910dff23edbc656cbc6ab092c322c
4
- data.tar.gz: c69628eea5d97fae581078584353b3125b06134d
3
+ metadata.gz: 44eacddc1b64782c61eff633b15f40db1ba3b9d9
4
+ data.tar.gz: 377ae25429bb8804c03a951d0150097d11cb808e
5
5
  SHA512:
6
- metadata.gz: fbceef56880f497c8d09da21123a12056ba5bc4c351bc7e49f88b657b323fc7e9d775bb1e48836222941954ffd356f6709348eb6d9b3f7bdfa0899b77e1d0e57
7
- data.tar.gz: f5a1724a484d227cb9499611c285ceeb60fbf27e99b6cfe7a67c71eb3d7272745740eb79e25a5ba828cb25ec667cc51778429a26eca95b6569d94f814c0a48b3
6
+ metadata.gz: 1f09c136bb17bfabfe739882cc61c7f0bf50dfba140aec66db0a6e2962de0d25e844804812137b5c3e798a61bb2b8002d83fc4d7766bc7fc52ce159535634b4e
7
+ data.tar.gz: f9742540b13859de929a299ac28a646d305e4a72bcffdd0588dad268f53cb0135f440a1f914cd7504a3b01eb5bf9c569c54c83cfaa4ba0b7f131326ef01a0212
data/README.md CHANGED
@@ -1,9 +1,12 @@
1
- re2 [![Build Status](https://secure.travis-ci.org/mudge/re2.png?branch=master)](http://travis-ci.org/mudge/re2)
1
+ re2 [![Build Status](https://travis-ci.org/mudge/re2.svg?branch=master)](http://travis-ci.org/mudge/re2)
2
2
  ===
3
3
 
4
4
  A Ruby binding to [re2][], an "efficient, principled regular expression
5
5
  library".
6
6
 
7
+ **Current version:** 0.6.1
8
+ **Supported Ruby versions:** 1.8.7, 1.9.2, 1.9.3, 2.0.0, 2.1.0, Rubinius 2.2
9
+
7
10
  Installation
8
11
  ------------
9
12
 
@@ -158,7 +161,7 @@ All feedback should go to the mailing list: <mailto:ruby.re2@librelist.com>
158
161
  [ruby-dev]: http://packages.debian.org/ruby-dev
159
162
  [build-essential]: http://packages.debian.org/build-essential
160
163
  [Regexp]: http://ruby-doc.org/core/classes/Regexp.html
161
- [MatchData]: http://ruby-doc.org/core/classes/MatchData.html
164
+ [MatchData]: http://ruby-doc.org/core/classes/MatchData.html
162
165
  [Homebrew]: http://mxcl.github.com/homebrew
163
166
  [libre2-dev]: http://packages.debian.org/search?keywords=libre2-dev
164
167
  [official syntax page]: http://code.google.com/p/re2/wiki/Syntax
data/Rakefile CHANGED
@@ -9,6 +9,10 @@ Rake::TestTask.new do |t|
9
9
  t.verbose = true
10
10
  end
11
11
 
12
+ task :valgrind do
13
+ system "valgrind --tool=memcheck --leak-check=full --show-reachable=no --num-callers=15 --track-fds=yes --workaround-gcc296-bugs=yes --max-stackframe=7304328 --dsymutil=yes --track-origins=yes --log-file=report.txt ruby spec/leak.rb"
14
+ end
15
+
12
16
  task :test => :compile
13
17
  task :spec => :test
14
18
  task :default => :test
@@ -17,1304 +17,1311 @@ using std::nothrow;
17
17
  using std::map;
18
18
  using std::vector;
19
19
 
20
- extern "C" {
21
- #ifdef HAVE_RUBY_ENCODING_H
22
- #include <ruby/encoding.h>
23
- #define ENCODED_STR_NEW(str, length, encoding) \
24
- ({ \
25
- VALUE _string = rb_str_new(str, length); \
26
- int _enc = rb_enc_find_index(encoding); \
27
- rb_enc_associate_index(_string, _enc); \
28
- _string; \
29
- })
30
- #define ENCODED_STR_NEW2(str, length, str2) \
31
- ({ \
32
- VALUE _string = rb_str_new(str, length); \
33
- int _enc = rb_enc_get_index(str2); \
34
- rb_enc_associate_index(_string, _enc); \
35
- _string; \
36
- })
37
- #else
38
- #define ENCODED_STR_NEW(str, length, encoding) \
39
- rb_str_new((const char *)str, (long)length)
40
- #define ENCODED_STR_NEW2(str, length, str2) \
41
- rb_str_new((const char *)str, (long)length)
42
- #endif
20
+ #ifdef HAVE_RUBY_ENCODING_H
21
+ #include <ruby/encoding.h>
22
+ #define ENCODED_STR_NEW(str, length, encoding) \
23
+ ({ \
24
+ VALUE _string = rb_str_new(str, length); \
25
+ int _enc = rb_enc_find_index(encoding); \
26
+ rb_enc_associate_index(_string, _enc); \
27
+ _string; \
28
+ })
29
+ #define ENCODED_STR_NEW2(str, length, str2) \
30
+ ({ \
31
+ VALUE _string = rb_str_new(str, length); \
32
+ int _enc = rb_enc_get_index(str2); \
33
+ rb_enc_associate_index(_string, _enc); \
34
+ _string; \
35
+ })
36
+ #else
37
+ #define ENCODED_STR_NEW(str, length, encoding) \
38
+ rb_str_new((const char *)str, (long)length)
39
+ #define ENCODED_STR_NEW2(str, length, str2) \
40
+ rb_str_new((const char *)str, (long)length)
41
+ #endif
42
+
43
+ #define BOOL2RUBY(v) (v ? Qtrue : Qfalse)
44
+ #define UNUSED(x) ((void)x)
45
+
46
+ #ifndef RSTRING_LEN
47
+ #define RSTRING_LEN(x) (RSTRING(x)->len)
48
+ #endif
49
+
50
+ #ifndef RSTRING_PTR
51
+ #define RSTRING_PTR(x) (RSTRING(x)->ptr)
52
+ #endif
53
+
54
+ #ifdef HAVE_ENDPOS_ARGUMENT
55
+ #define match(pattern, text, startpos, endpos, anchor, match, nmatch) \
56
+ (pattern->Match(text, startpos, endpos, anchor, match, nmatch))
57
+ #else
58
+ #define match(pattern, text, startpos, endpos, anchor, match, nmatch) \
59
+ (pattern->Match(text, startpos, anchor, match, nmatch))
60
+ #endif
61
+
62
+ typedef struct {
63
+ RE2 *pattern;
64
+ } re2_pattern;
65
+
66
+ typedef struct {
67
+ re2::StringPiece *matches;
68
+ int number_of_matches;
69
+ VALUE regexp, text;
70
+ } re2_matchdata;
71
+
72
+ typedef struct {
73
+ re2::StringPiece *input;
74
+ int number_of_capturing_groups;
75
+ VALUE regexp, text;
76
+ } re2_scanner;
77
+
78
+ VALUE re2_mRE2, re2_cRegexp, re2_cMatchData, re2_cScanner;
79
+
80
+ /* Symbols used in RE2 options. */
81
+ static ID id_utf8, id_posix_syntax, id_longest_match, id_log_errors,
82
+ id_max_mem, id_literal, id_never_nl, id_case_sensitive,
83
+ id_perl_classes, id_word_boundary, id_one_line;
84
+
85
+ void re2_matchdata_mark(re2_matchdata* self) {
86
+ rb_gc_mark(self->regexp);
87
+ rb_gc_mark(self->text);
88
+ }
43
89
 
44
- #define BOOL2RUBY(v) (v ? Qtrue : Qfalse)
45
- #define UNUSED(x) ((void)x)
90
+ void re2_matchdata_free(re2_matchdata* self) {
91
+ if (self->matches) {
92
+ delete[] self->matches;
93
+ }
94
+ free(self);
95
+ }
46
96
 
47
- #ifndef RSTRING_LEN
48
- #define RSTRING_LEN(x) (RSTRING(x)->len)
49
- #endif
97
+ void re2_scanner_mark(re2_scanner* self) {
98
+ rb_gc_mark(self->regexp);
99
+ rb_gc_mark(self->text);
100
+ }
50
101
 
51
- #ifndef RSTRING_PTR
52
- #define RSTRING_PTR(x) (RSTRING(x)->ptr)
53
- #endif
102
+ void re2_scanner_free(re2_scanner* self) {
103
+ if (self->input) {
104
+ delete self->input;
105
+ }
106
+ free(self);
107
+ }
54
108
 
55
- #ifdef HAVE_ENDPOS_ARGUMENT
56
- #define match(pattern, text, startpos, endpos, anchor, match, nmatch) \
57
- (pattern->Match(text, startpos, endpos, anchor, match, nmatch))
58
- #else
59
- #define match(pattern, text, startpos, endpos, anchor, match, nmatch) \
60
- (pattern->Match(text, startpos, anchor, match, nmatch))
61
- #endif
109
+ void re2_regexp_free(re2_pattern* self) {
110
+ if (self->pattern) {
111
+ delete self->pattern;
112
+ }
113
+ free(self);
114
+ }
62
115
 
63
- typedef struct {
64
- RE2 *pattern;
65
- } re2_pattern;
116
+ static VALUE re2_matchdata_allocate(VALUE klass) {
117
+ re2_matchdata *m;
118
+ return Data_Make_Struct(klass, re2_matchdata, re2_matchdata_mark,
119
+ re2_matchdata_free, m);
120
+ }
66
121
 
67
- typedef struct {
68
- re2::StringPiece *matches;
69
- int number_of_matches;
70
- VALUE regexp, text;
71
- } re2_matchdata;
122
+ static VALUE re2_scanner_allocate(VALUE klass) {
123
+ re2_scanner *c;
124
+ return Data_Make_Struct(klass, re2_scanner, re2_scanner_mark,
125
+ re2_scanner_free, c);
126
+ }
127
+
128
+ /*
129
+ * Returns a frozen copy of the string passed into +match+.
130
+ *
131
+ * @return [String] a frozen copy of the passed string.
132
+ * @example
133
+ * m = RE2::Regexp.new('(\d+)').match("bob 123")
134
+ * m.string #=> "bob 123"
135
+ */
136
+ static VALUE re2_matchdata_string(VALUE self) {
137
+ re2_matchdata *m;
138
+ Data_Get_Struct(self, re2_matchdata, m);
72
139
 
73
- typedef struct {
74
- re2::StringPiece *input;
75
- int number_of_capturing_groups;
76
- VALUE regexp, text;
77
- } re2_scanner;
140
+ return m->text;
141
+ }
78
142
 
79
- VALUE re2_mRE2, re2_cRegexp, re2_cMatchData, re2_cScanner;
143
+ /*
144
+ * Returns the string passed into the scanner.
145
+ *
146
+ * @return [String] the original string.
147
+ * @example
148
+ * c = RE2::Regexp.new('(\d+)').scan("foo")
149
+ * c.string #=> "foo"
150
+ */
151
+ static VALUE re2_scanner_string(VALUE self) {
152
+ re2_scanner *c;
153
+ Data_Get_Struct(self, re2_scanner, c);
80
154
 
81
- /* Symbols used in RE2 options. */
82
- static ID id_utf8, id_posix_syntax, id_longest_match, id_log_errors,
83
- id_max_mem, id_literal, id_never_nl, id_case_sensitive,
84
- id_perl_classes, id_word_boundary, id_one_line;
155
+ return c->text;
156
+ }
85
157
 
86
- void re2_matchdata_mark(re2_matchdata* self) {
87
- rb_gc_mark(self->regexp);
88
- rb_gc_mark(self->text);
89
- }
158
+ /*
159
+ * Rewind the scanner to the start of the string.
160
+ *
161
+ * @example
162
+ * s = RE2::Regexp.new('(\d+)').scan("1 2 3")
163
+ * e = s.to_enum
164
+ * e.scan #=> ["1"]
165
+ * e.scan #=> ["2"]
166
+ * s.rewind
167
+ * e.scan #=> ["1"]
168
+ */
169
+ static VALUE re2_scanner_rewind(VALUE self) {
170
+ re2_scanner *c;
171
+ Data_Get_Struct(self, re2_scanner, c);
90
172
 
91
- void re2_matchdata_free(re2_matchdata* self) {
92
- if (self->matches) {
93
- delete[] self->matches;
94
- }
95
- free(self);
96
- }
173
+ c->input = new(nothrow) re2::StringPiece(StringValuePtr(c->text));
97
174
 
98
- void re2_scanner_mark(re2_scanner* self) {
99
- rb_gc_mark(self->regexp);
100
- rb_gc_mark(self->text);
101
- }
175
+ return self;
176
+ }
102
177
 
103
- void re2_scanner_free(re2_scanner* self) {
104
- if (self->input) {
105
- delete self->input;
106
- }
107
- free(self);
178
+ /*
179
+ * Scan the given text incrementally for matches, returning an array of
180
+ * matches on each subsequent call. Returns nil if no matches are found.
181
+ *
182
+ * @return [Array<String>] the matches.
183
+ * @example
184
+ * s = RE2::Regexp.new('(\w+)').scan("Foo bar baz")
185
+ * s.scan #=> ["Foo"]
186
+ * s.scan #=> ["bar"]
187
+ */
188
+ static VALUE re2_scanner_scan(VALUE self) {
189
+ int i;
190
+ re2_pattern *p;
191
+ re2_scanner *c;
192
+ VALUE result;
193
+
194
+ Data_Get_Struct(self, re2_scanner, c);
195
+ Data_Get_Struct(c->regexp, re2_pattern, p);
196
+
197
+ vector<RE2::Arg> argv(c->number_of_capturing_groups);
198
+ vector<RE2::Arg*> args(c->number_of_capturing_groups);
199
+ vector<string> matches(c->number_of_capturing_groups);
200
+
201
+ for (i = 0; i < c->number_of_capturing_groups; i++) {
202
+ matches[i] = "";
203
+ argv[i] = &matches[i];
204
+ args[i] = &argv[i];
108
205
  }
109
206
 
110
- void re2_regexp_free(re2_pattern* self) {
111
- if (self->pattern) {
112
- delete self->pattern;
207
+ if (RE2::FindAndConsumeN(c->input, *p->pattern, &args[0],
208
+ c->number_of_capturing_groups)) {
209
+ result = rb_ary_new2(c->number_of_capturing_groups);
210
+ for (i = 0; i < c->number_of_capturing_groups; i++) {
211
+ if (matches[i].empty()) {
212
+ rb_ary_push(result, Qnil);
213
+ } else {
214
+ rb_ary_push(result, ENCODED_STR_NEW(matches[i].data(),
215
+ matches[i].size(),
216
+ p->pattern->options().utf8() ? "UTF-8" : "ISO-8859-1"));
217
+ }
113
218
  }
114
- free(self);
219
+ } else {
220
+ result = Qnil;
115
221
  }
116
222
 
117
- static VALUE re2_matchdata_allocate(VALUE klass) {
118
- re2_matchdata *m;
119
- return Data_Make_Struct(klass, re2_matchdata, re2_matchdata_mark,
120
- re2_matchdata_free, m);
121
- }
223
+ return result;
224
+ }
122
225
 
123
- static VALUE re2_scanner_allocate(VALUE klass) {
124
- re2_scanner *c;
125
- return Data_Make_Struct(klass, re2_scanner, re2_scanner_mark,
126
- re2_scanner_free, c);
127
- }
226
+ /*
227
+ * Returns the number of elements in the match array (including nils).
228
+ *
229
+ * @return [Fixnum] the number of elements
230
+ * @example
231
+ * m = RE2::Regexp.new('(\d+)').match("bob 123")
232
+ * m.size #=> 2
233
+ * m.length #=> 2
234
+ */
235
+ static VALUE re2_matchdata_size(VALUE self) {
236
+ re2_matchdata *m;
237
+ Data_Get_Struct(self, re2_matchdata, m);
128
238
 
129
- /*
130
- * Returns a frozen copy of the string passed into +match+.
131
- *
132
- * @return [String] a frozen copy of the passed string.
133
- * @example
134
- * m = RE2::Regexp.new('(\d+)').match("bob 123")
135
- * m.string #=> "bob 123"
136
- */
137
- static VALUE re2_matchdata_string(VALUE self) {
138
- re2_matchdata *m;
139
- Data_Get_Struct(self, re2_matchdata, m);
140
-
141
- return m->text;
142
- }
239
+ return INT2FIX(m->number_of_matches);
240
+ }
143
241
 
144
- /*
145
- * Returns the string passed into the scanner.
146
- *
147
- * @return [String] the original string.
148
- * @example
149
- * c = RE2::Regexp.new('(\d+)').scan("foo")
150
- * c.string #=> "foo"
151
- */
152
- static VALUE re2_scanner_string(VALUE self) {
153
- re2_scanner *c;
154
- Data_Get_Struct(self, re2_scanner, c);
155
-
156
- return c->text;
157
- }
242
+ /*
243
+ * Returns the {RE2::Regexp} used in the match.
244
+ *
245
+ * @return [RE2::Regexp] the regexp used in the match
246
+ * @example
247
+ * m = RE2::Regexp.new('(\d+)').match("bob 123")
248
+ * m.regexp #=> #<RE2::Regexp /(\d+)/>
249
+ */
250
+ static VALUE re2_matchdata_regexp(VALUE self) {
251
+ re2_matchdata *m;
252
+ Data_Get_Struct(self, re2_matchdata, m);
253
+ return m->regexp;
254
+ }
158
255
 
159
- /*
160
- * Rewind the scanner to the start of the string.
161
- *
162
- * @example
163
- * s = RE2::Regexp.new('(\d+)').scan("1 2 3")
164
- * e = s.to_enum
165
- * e.scan #=> ["1"]
166
- * e.scan #=> ["2"]
167
- * s.rewind
168
- * e.scan #=> ["1"]
169
- */
170
- static VALUE re2_scanner_rewind(VALUE self) {
171
- re2_scanner *c;
172
- Data_Get_Struct(self, re2_scanner, c);
173
-
174
- c->input = new(nothrow) re2::StringPiece(StringValuePtr(c->text));
175
-
176
- return self;
177
- }
256
+ /*
257
+ * Returns the {RE2::Regexp} used in the scanner.
258
+ *
259
+ * @return [RE2::Regexp] the regexp used in the scanner
260
+ * @example
261
+ * c = RE2::Regexp.new('(\d+)').scan("bob 123")
262
+ * c.regexp #=> #<RE2::Regexp /(\d+)/>
263
+ */
264
+ static VALUE re2_scanner_regexp(VALUE self) {
265
+ re2_scanner *c;
266
+ Data_Get_Struct(self, re2_scanner, c);
178
267
 
179
- /*
180
- * Scan the given text incrementally for matches, returning an array of
181
- * matches on each subsequent call. Returns nil if no matches are found.
182
- *
183
- * @return [Array<String>] the matches.
184
- * @example
185
- * s = RE2::Regexp.new('(\w+)').scan("Foo bar baz")
186
- * s.scan #=> ["Foo"]
187
- * s.scan #=> ["bar"]
188
- */
189
- static VALUE re2_scanner_scan(VALUE self) {
190
- int i;
191
- re2_pattern *p;
192
- re2_scanner *c;
193
- VALUE result;
194
-
195
- Data_Get_Struct(self, re2_scanner, c);
196
- Data_Get_Struct(c->regexp, re2_pattern, p);
197
-
198
- vector<RE2::Arg> argv(c->number_of_capturing_groups);
199
- vector<RE2::Arg*> args(c->number_of_capturing_groups);
200
- vector<string> matches(c->number_of_capturing_groups);
268
+ return c->regexp;
269
+ }
201
270
 
202
- for (i = 0; i < c->number_of_capturing_groups; i++) {
203
- matches[i] = "";
204
- argv[i] = &matches[i];
205
- args[i] = &argv[i];
206
- }
271
+ static VALUE re2_regexp_allocate(VALUE klass) {
272
+ re2_pattern *p;
273
+ return Data_Make_Struct(klass, re2_pattern, 0, re2_regexp_free, p);
274
+ }
207
275
 
208
- if (RE2::FindAndConsumeN(c->input, *p->pattern, &args[0],
209
- c->number_of_capturing_groups)) {
210
- result = rb_ary_new2(c->number_of_capturing_groups);
211
- for (i = 0; i < c->number_of_capturing_groups; i++) {
212
- if (matches[i].empty()) {
213
- rb_ary_push(result, Qnil);
214
- } else {
215
- rb_ary_push(result, ENCODED_STR_NEW(matches[i].data(),
216
- matches[i].size(),
217
- p->pattern->options().utf8() ? "UTF-8" : "ISO-8859-1"));
218
- }
219
- }
276
+ /*
277
+ * Returns the array of matches.
278
+ *
279
+ * @return [Array<String, nil>] the array of matches
280
+ * @example
281
+ * m = RE2::Regexp.new('(\d+)').match("bob 123")
282
+ * m.to_a #=> ["123", "123"]
283
+ */
284
+ static VALUE re2_matchdata_to_a(VALUE self) {
285
+ int i;
286
+ re2_matchdata *m;
287
+ re2_pattern *p;
288
+ re2::StringPiece *match;
289
+ VALUE array;
290
+
291
+ Data_Get_Struct(self, re2_matchdata, m);
292
+ Data_Get_Struct(m->regexp, re2_pattern, p);
293
+
294
+ array = rb_ary_new2(m->number_of_matches);
295
+ for (i = 0; i < m->number_of_matches; i++) {
296
+ match = &m->matches[i];
297
+
298
+ if (match->empty()) {
299
+ rb_ary_push(array, Qnil);
220
300
  } else {
221
- result = Qnil;
301
+ rb_ary_push(array, ENCODED_STR_NEW(match->data(), match->size(),
302
+ p->pattern->options().utf8() ? "UTF-8" : "ISO-8859-1"));
222
303
  }
223
-
224
- return result;
225
304
  }
226
305
 
227
- /*
228
- * Returns the number of elements in the match array (including nils).
229
- *
230
- * @return [Fixnum] the number of elements
231
- * @example
232
- * m = RE2::Regexp.new('(\d+)').match("bob 123")
233
- * m.size #=> 2
234
- * m.length #=> 2
235
- */
236
- static VALUE re2_matchdata_size(VALUE self) {
237
- re2_matchdata *m;
238
- Data_Get_Struct(self, re2_matchdata, m);
239
-
240
- return INT2FIX(m->number_of_matches);
241
- }
306
+ return array;
307
+ }
242
308
 
243
- /*
244
- * Returns the {RE2::Regexp} used in the match.
245
- *
246
- * @return [RE2::Regexp] the regexp used in the match
247
- * @example
248
- * m = RE2::Regexp.new('(\d+)').match("bob 123")
249
- * m.regexp #=> #<RE2::Regexp /(\d+)/>
250
- */
251
- static VALUE re2_matchdata_regexp(VALUE self) {
252
- re2_matchdata *m;
253
- Data_Get_Struct(self, re2_matchdata, m);
254
- return m->regexp;
255
- }
309
+ static VALUE re2_matchdata_nth_match(int nth, VALUE self) {
310
+ re2_matchdata *m;
311
+ re2_pattern *p;
312
+ re2::StringPiece *match;
256
313
 
257
- /*
258
- * Returns the {RE2::Regexp} used in the scanner.
259
- *
260
- * @return [RE2::Regexp] the regexp used in the scanner
261
- * @example
262
- * c = RE2::Regexp.new('(\d+)').scan("bob 123")
263
- * c.regexp #=> #<RE2::Regexp /(\d+)/>
264
- */
265
- static VALUE re2_scanner_regexp(VALUE self) {
266
- re2_scanner *c;
267
- Data_Get_Struct(self, re2_scanner, c);
268
-
269
- return c->regexp;
270
- }
314
+ Data_Get_Struct(self, re2_matchdata, m);
315
+ Data_Get_Struct(m->regexp, re2_pattern, p);
271
316
 
272
- static VALUE re2_regexp_allocate(VALUE klass) {
273
- re2_pattern *p;
274
- return Data_Make_Struct(klass, re2_pattern, 0, re2_regexp_free, p);
275
- }
317
+ if (nth < 0 || nth >= m->number_of_matches) {
318
+ return Qnil;
319
+ } else {
320
+ match = &m->matches[nth];
276
321
 
277
- /*
278
- * Returns the array of matches.
279
- *
280
- * @return [Array<String, nil>] the array of matches
281
- * @example
282
- * m = RE2::Regexp.new('(\d+)').match("bob 123")
283
- * m.to_a #=> ["123", "123"]
284
- */
285
- static VALUE re2_matchdata_to_a(VALUE self) {
286
- int i;
287
- re2_matchdata *m;
288
- re2_pattern *p;
289
- re2::StringPiece match;
290
- VALUE array;
291
-
292
- Data_Get_Struct(self, re2_matchdata, m);
293
- Data_Get_Struct(m->regexp, re2_pattern, p);
294
-
295
- array = rb_ary_new2(m->number_of_matches);
296
- for (i = 0; i < m->number_of_matches; i++) {
297
- if (m->matches[i].empty()) {
298
- rb_ary_push(array, Qnil);
299
- } else {
300
- match = m->matches[i];
301
- rb_ary_push(array, ENCODED_STR_NEW(match.data(), match.size(),
302
- p->pattern->options().utf8() ? "UTF-8" : "ISO-8859-1"));
303
- }
322
+ if (match->empty()) {
323
+ return Qnil;
324
+ } else {
325
+ return ENCODED_STR_NEW(match->data(), match->size(),
326
+ p->pattern->options().utf8() ? "UTF-8" : "ISO-8859-1");
304
327
  }
305
-
306
- return array;
307
328
  }
329
+ }
308
330
 
309
- static VALUE re2_matchdata_nth_match(int nth, VALUE self) {
310
- re2_matchdata *m;
311
- re2_pattern *p;
312
- re2::StringPiece match;
331
+ static VALUE re2_matchdata_named_match(const char* name, VALUE self) {
332
+ int idx;
333
+ re2_matchdata *m;
334
+ re2_pattern *p;
335
+ map<string, int> groups;
336
+ string name_as_string(name);
313
337
 
314
- Data_Get_Struct(self, re2_matchdata, m);
315
- Data_Get_Struct(m->regexp, re2_pattern, p);
338
+ Data_Get_Struct(self, re2_matchdata, m);
339
+ Data_Get_Struct(m->regexp, re2_pattern, p);
316
340
 
317
- if (nth < 0 || nth >= m->number_of_matches) {
318
- return Qnil;
319
- } else {
320
- match = m->matches[nth];
341
+ groups = p->pattern->NamedCapturingGroups();
321
342
 
322
- if (match.empty()) {
323
- return Qnil;
324
- } else {
325
- return ENCODED_STR_NEW(match.data(), match.size(),
326
- p->pattern->options().utf8() ? "UTF-8" : "ISO-8859-1");
327
- }
328
- }
343
+ if (groups.count(name_as_string) == 1) {
344
+ idx = groups[name_as_string];
345
+ return re2_matchdata_nth_match(idx, self);
346
+ } else {
347
+ return Qnil;
329
348
  }
349
+ }
330
350
 
331
- static VALUE re2_matchdata_named_match(const char* name, VALUE self) {
332
- int idx;
333
- re2_matchdata *m;
334
- re2_pattern *p;
335
- map<string, int> groups;
336
- string name_as_string(name);
351
+ /*
352
+ * Retrieve zero, one or more matches by index or name.
353
+ *
354
+ * @return [Array<String, nil>, String, Boolean]
355
+ *
356
+ * @overload [](index)
357
+ * Access a particular match by index.
358
+ *
359
+ * @param [Fixnum] index the index of the match to fetch
360
+ * @return [String, nil] the specified match
361
+ * @example
362
+ * m = RE2::Regexp.new('(\d+)').match("bob 123")
363
+ * m[0] #=> "123"
364
+ *
365
+ * @overload [](start, length)
366
+ * Access a range of matches by starting index and length.
367
+ *
368
+ * @param [Fixnum] start the index from which to start
369
+ * @param [Fixnum] length the number of elements to fetch
370
+ * @return [Array<String, nil>] the specified matches
371
+ * @example
372
+ * m = RE2::Regexp.new('(\d+)').match("bob 123")
373
+ * m[0, 1] #=> ["123"]
374
+ *
375
+ * @overload [](range)
376
+ * Access a range of matches by index.
377
+ *
378
+ * @param [Range] range the range of match indexes to fetch
379
+ * @return [Array<String, nil>] the specified matches
380
+ * @example
381
+ * m = RE2::Regexp.new('(\d+)').match("bob 123")
382
+ * m[0..1] #=> "[123", "123"]
383
+ *
384
+ * @overload [](name)
385
+ * Access a particular match by name.
386
+ *
387
+ * @param [String, Symbol] name the name of the match to fetch
388
+ * @return [String, nil] the specific match
389
+ * @example
390
+ * m = RE2::Regexp.new('(?P<number>\d+)').match("bob 123")
391
+ * m["number"] #=> "123"
392
+ * m[:number] #=> "123"
393
+ */
394
+ static VALUE re2_matchdata_aref(int argc, VALUE *argv, VALUE self) {
395
+ VALUE idx, rest;
396
+ rb_scan_args(argc, argv, "11", &idx, &rest);
397
+
398
+ if (TYPE(idx) == T_STRING) {
399
+ return re2_matchdata_named_match(StringValuePtr(idx), self);
400
+ } else if (SYMBOL_P(idx)) {
401
+ return re2_matchdata_named_match(rb_id2name(SYM2ID(idx)), self);
402
+ } else if (!NIL_P(rest) || !FIXNUM_P(idx) || FIX2INT(idx) < 0) {
403
+ return rb_ary_aref(argc, argv, re2_matchdata_to_a(self));
404
+ } else {
405
+ return re2_matchdata_nth_match(FIX2INT(idx), self);
406
+ }
407
+ }
337
408
 
338
- Data_Get_Struct(self, re2_matchdata, m);
339
- Data_Get_Struct(m->regexp, re2_pattern, p);
409
+ /*
410
+ * Returns the entire matched string.
411
+ *
412
+ * @return [String] the entire matched string
413
+ */
414
+ static VALUE re2_matchdata_to_s(VALUE self) {
415
+ return re2_matchdata_nth_match(0, self);
416
+ }
340
417
 
341
- groups = p->pattern->NamedCapturingGroups();
418
+ /*
419
+ * Returns a printable version of the match.
420
+ *
421
+ * @return [String] a printable version of the match
422
+ * @example
423
+ * m = RE2::Regexp.new('(\d+)').match("bob 123")
424
+ * m.inspect #=> "#<RE2::MatchData \"123\" 1:\"123\">"
425
+ */
426
+ static VALUE re2_matchdata_inspect(VALUE self) {
427
+ int i;
428
+ re2_matchdata *m;
429
+ re2_pattern *p;
430
+ VALUE match, result;
431
+ ostringstream output;
342
432
 
343
- if (groups.count(name_as_string) == 1) {
344
- idx = groups[name_as_string];
345
- return re2_matchdata_nth_match(idx, self);
346
- } else {
347
- return Qnil;
433
+ Data_Get_Struct(self, re2_matchdata, m);
434
+ Data_Get_Struct(m->regexp, re2_pattern, p);
435
+
436
+ output << "#<RE2::MatchData";
437
+
438
+ for (i = 0; i < m->number_of_matches; i++) {
439
+ output << " ";
440
+
441
+ if (i > 0) {
442
+ output << i << ":";
348
443
  }
349
- }
350
444
 
351
- /*
352
- * Retrieve zero, one or more matches by index or name.
353
- *
354
- * @return [Array<String, nil>, String, Boolean]
355
- *
356
- * @overload [](index)
357
- * Access a particular match by index.
358
- *
359
- * @param [Fixnum] index the index of the match to fetch
360
- * @return [String, nil] the specified match
361
- * @example
362
- * m = RE2::Regexp.new('(\d+)').match("bob 123")
363
- * m[0] #=> "123"
364
- *
365
- * @overload [](start, length)
366
- * Access a range of matches by starting index and length.
367
- *
368
- * @param [Fixnum] start the index from which to start
369
- * @param [Fixnum] length the number of elements to fetch
370
- * @return [Array<String, nil>] the specified matches
371
- * @example
372
- * m = RE2::Regexp.new('(\d+)').match("bob 123")
373
- * m[0, 1] #=> ["123"]
374
- *
375
- * @overload [](range)
376
- * Access a range of matches by index.
377
- *
378
- * @param [Range] range the range of match indexes to fetch
379
- * @return [Array<String, nil>] the specified matches
380
- * @example
381
- * m = RE2::Regexp.new('(\d+)').match("bob 123")
382
- * m[0..1] #=> "[123", "123"]
383
- *
384
- * @overload [](name)
385
- * Access a particular match by name.
386
- *
387
- * @param [String, Symbol] name the name of the match to fetch
388
- * @return [String, nil] the specific match
389
- * @example
390
- * m = RE2::Regexp.new('(?P<number>\d+)').match("bob 123")
391
- * m["number"] #=> "123"
392
- * m[:number] #=> "123"
393
- */
394
- static VALUE re2_matchdata_aref(int argc, VALUE *argv, VALUE self) {
395
- VALUE idx, rest;
396
- rb_scan_args(argc, argv, "11", &idx, &rest);
397
-
398
- if (TYPE(idx) == T_STRING) {
399
- return re2_matchdata_named_match(StringValuePtr(idx), self);
400
- } else if (TYPE(idx) == T_SYMBOL) {
401
- return re2_matchdata_named_match(rb_id2name(SYM2ID(idx)), self);
402
- } else if (!NIL_P(rest) || !FIXNUM_P(idx) || FIX2INT(idx) < 0) {
403
- return rb_ary_aref(argc, argv, re2_matchdata_to_a(self));
445
+ match = re2_matchdata_nth_match(i, self);
446
+
447
+ if (match == Qnil) {
448
+ output << "nil";
404
449
  } else {
405
- return re2_matchdata_nth_match(FIX2INT(idx), self);
450
+ output << "\"" << StringValuePtr(match) << "\"";
406
451
  }
407
452
  }
408
453
 
409
- /*
410
- * Returns the entire matched string.
411
- *
412
- * @return [String] the entire matched string
413
- */
414
- static VALUE re2_matchdata_to_s(VALUE self) {
415
- return re2_matchdata_nth_match(0, self);
416
- }
454
+ output << ">";
417
455
 
418
- /*
419
- * Returns a printable version of the match.
420
- *
421
- * @return [String] a printable version of the match
422
- * @example
423
- * m = RE2::Regexp.new('(\d+)').match("bob 123")
424
- * m.inspect #=> "#<RE2::MatchData \"123\" 1:\"123\">"
425
- */
426
- static VALUE re2_matchdata_inspect(VALUE self) {
427
- int i;
428
- re2_matchdata *m;
429
- re2_pattern *p;
430
- VALUE match, result;
431
- ostringstream output;
432
-
433
- Data_Get_Struct(self, re2_matchdata, m);
434
- Data_Get_Struct(m->regexp, re2_pattern, p);
435
-
436
- output << "#<RE2::MatchData";
437
-
438
- for (i = 0; i < m->number_of_matches; i++) {
439
- output << " ";
440
-
441
- if (i > 0) {
442
- output << i << ":";
443
- }
456
+ result = ENCODED_STR_NEW(output.str().data(), output.str().length(),
457
+ p->pattern->options().utf8() ? "UTF-8" : "ISO-8859-1");
444
458
 
445
- match = re2_matchdata_nth_match(i, self);
459
+ return result;
460
+ }
446
461
 
447
- if (match == Qnil) {
448
- output << "nil";
449
- } else {
450
- output << "\"" << StringValuePtr(match) << "\"";
451
- }
462
+ /*
463
+ * Returns a new RE2 object with a compiled version of
464
+ * +pattern+ stored inside. Equivalent to +RE2.new+.
465
+ *
466
+ * @return [RE2::Regexp] an RE2::Regexp with the specified pattern and options
467
+ * @param [String] pattern the pattern to compile
468
+ * @param [Hash] options the options to compile a regexp with
469
+ * @see RE2::Regexp.new
470
+ *
471
+ */
472
+ static VALUE re2_re2(int argc, VALUE *argv, VALUE self) {
473
+ UNUSED(self);
474
+ return rb_class_new_instance(argc, argv, re2_cRegexp);
475
+ }
476
+
477
+ /*
478
+ * Returns a new {RE2::Regexp} object with a compiled version of
479
+ * +pattern+ stored inside.
480
+ *
481
+ * @return [RE2::Regexp]
482
+ *
483
+ * @overload initialize(pattern)
484
+ * Returns a new {RE2::Regexp} object with a compiled version of
485
+ * +pattern+ stored inside with the default options.
486
+ *
487
+ * @param [String] pattern the pattern to compile
488
+ * @return [RE2::Regexp] an RE2::Regexp with the specified pattern
489
+ * @raise [NoMemoryError] if memory could not be allocated for the compiled
490
+ * pattern
491
+ *
492
+ * @overload initialize(pattern, options)
493
+ * Returns a new {RE2::Regexp} object with a compiled version of
494
+ * +pattern+ stored inside with the specified options.
495
+ *
496
+ * @param [String] pattern the pattern to compile
497
+ * @param [Hash] options the options with which to compile the pattern
498
+ * @option options [Boolean] :utf8 (true) text and pattern are UTF-8; otherwise Latin-1
499
+ * @option options [Boolean] :posix_syntax (false) restrict regexps to POSIX egrep syntax
500
+ * @option options [Boolean] :longest_match (false) search for longest match, not first match
501
+ * @option options [Boolean] :log_errors (true) log syntax and execution errors to ERROR
502
+ * @option options [Fixnum] :max_mem approx. max memory footprint of RE2
503
+ * @option options [Boolean] :literal (false) interpret string as literal, not regexp
504
+ * @option options [Boolean] :never_nl (false) never match \n, even if it is in regexp
505
+ * @option options [Boolean] :case_sensitive (true) match is case-sensitive (regexp can override with (?i) unless in posix_syntax mode)
506
+ * @option options [Boolean] :perl_classes (false) allow Perl's \d \s \w \D \S \W when in posix_syntax mode
507
+ * @option options [Boolean] :word_boundary (false) allow \b \B (word boundary and not) when in posix_syntax mode
508
+ * @option options [Boolean] :one_line (false) ^ and $ only match beginning and end of text when in posix_syntax mode
509
+ * @return [RE2::Regexp] an RE2::Regexp with the specified pattern and options
510
+ * @raise [NoMemoryError] if memory could not be allocated for the compiled pattern
511
+ */
512
+ static VALUE re2_regexp_initialize(int argc, VALUE *argv, VALUE self) {
513
+ VALUE pattern, options, utf8, posix_syntax, longest_match, log_errors,
514
+ max_mem, literal, never_nl, case_sensitive, perl_classes,
515
+ word_boundary, one_line;
516
+ re2_pattern *p;
517
+
518
+ rb_scan_args(argc, argv, "11", &pattern, &options);
519
+ Data_Get_Struct(self, re2_pattern, p);
520
+
521
+ if (RTEST(options)) {
522
+ if (TYPE(options) != T_HASH) {
523
+ rb_raise(rb_eArgError, "options should be a hash");
452
524
  }
453
525
 
454
- output << ">";
526
+ RE2::Options re2_options;
455
527
 
456
- result = ENCODED_STR_NEW(output.str().data(), output.str().length(),
457
- p->pattern->options().utf8() ? "UTF-8" : "ISO-8859-1");
528
+ utf8 = rb_hash_aref(options, ID2SYM(id_utf8));
529
+ if (!NIL_P(utf8)) {
530
+ re2_options.set_utf8(RTEST(utf8));
531
+ }
458
532
 
459
- return result;
460
- }
533
+ posix_syntax = rb_hash_aref(options, ID2SYM(id_posix_syntax));
534
+ if (!NIL_P(posix_syntax)) {
535
+ re2_options.set_posix_syntax(RTEST(posix_syntax));
536
+ }
461
537
 
462
- /*
463
- * Returns a new RE2 object with a compiled version of
464
- * +pattern+ stored inside. Equivalent to +RE2.new+.
465
- *
466
- * @return [RE2::Regexp] an RE2::Regexp with the specified pattern and options
467
- * @param [String] pattern the pattern to compile
468
- * @param [Hash] options the options to compile a regexp with
469
- * @see RE2::Regexp.new
470
- *
471
- */
472
- static VALUE re2_re2(int argc, VALUE *argv, VALUE self) {
473
- UNUSED(self);
474
- return rb_class_new_instance(argc, argv, re2_cRegexp);
475
- }
538
+ longest_match = rb_hash_aref(options, ID2SYM(id_longest_match));
539
+ if (!NIL_P(longest_match)) {
540
+ re2_options.set_longest_match(RTEST(longest_match));
541
+ }
476
542
 
477
- /*
478
- * Returns a new {RE2::Regexp} object with a compiled version of
479
- * +pattern+ stored inside.
480
- *
481
- * @return [RE2::Regexp]
482
- *
483
- * @overload initialize(pattern)
484
- * Returns a new {RE2::Regexp} object with a compiled version of
485
- * +pattern+ stored inside with the default options.
486
- *
487
- * @param [String] pattern the pattern to compile
488
- * @return [RE2::Regexp] an RE2::Regexp with the specified pattern
489
- * @raise [NoMemoryError] if memory could not be allocated for the compiled
490
- * pattern
491
- *
492
- * @overload initialize(pattern, options)
493
- * Returns a new {RE2::Regexp} object with a compiled version of
494
- * +pattern+ stored inside with the specified options.
495
- *
496
- * @param [String] pattern the pattern to compile
497
- * @param [Hash] options the options with which to compile the pattern
498
- * @option options [Boolean] :utf8 (true) text and pattern are UTF-8; otherwise Latin-1
499
- * @option options [Boolean] :posix_syntax (false) restrict regexps to POSIX egrep syntax
500
- * @option options [Boolean] :longest_match (false) search for longest match, not first match
501
- * @option options [Boolean] :log_errors (true) log syntax and execution errors to ERROR
502
- * @option options [Fixnum] :max_mem approx. max memory footprint of RE2
503
- * @option options [Boolean] :literal (false) interpret string as literal, not regexp
504
- * @option options [Boolean] :never_nl (false) never match \n, even if it is in regexp
505
- * @option options [Boolean] :case_sensitive (true) match is case-sensitive (regexp can override with (?i) unless in posix_syntax mode)
506
- * @option options [Boolean] :perl_classes (false) allow Perl's \d \s \w \D \S \W when in posix_syntax mode
507
- * @option options [Boolean] :word_boundary (false) allow \b \B (word boundary and not) when in posix_syntax mode
508
- * @option options [Boolean] :one_line (false) ^ and $ only match beginning and end of text when in posix_syntax mode
509
- * @return [RE2::Regexp] an RE2::Regexp with the specified pattern and options
510
- * @raise [NoMemoryError] if memory could not be allocated for the compiled pattern
511
- */
512
- static VALUE re2_regexp_initialize(int argc, VALUE *argv, VALUE self) {
513
- VALUE pattern, options, utf8, posix_syntax, longest_match, log_errors,
514
- max_mem, literal, never_nl, case_sensitive, perl_classes,
515
- word_boundary, one_line;
516
- re2_pattern *p;
517
-
518
- rb_scan_args(argc, argv, "11", &pattern, &options);
519
- Data_Get_Struct(self, re2_pattern, p);
520
-
521
- if (RTEST(options)) {
522
- if (TYPE(options) != T_HASH) {
523
- rb_raise(rb_eArgError, "options should be a hash");
524
- }
543
+ log_errors = rb_hash_aref(options, ID2SYM(id_log_errors));
544
+ if (!NIL_P(log_errors)) {
545
+ re2_options.set_log_errors(RTEST(log_errors));
546
+ }
525
547
 
526
- RE2::Options re2_options;
548
+ max_mem = rb_hash_aref(options, ID2SYM(id_max_mem));
549
+ if (!NIL_P(max_mem)) {
550
+ re2_options.set_max_mem(NUM2INT(max_mem));
551
+ }
527
552
 
528
- utf8 = rb_hash_aref(options, ID2SYM(id_utf8));
529
- if (!NIL_P(utf8)) {
530
- re2_options.set_utf8(RTEST(utf8));
531
- }
553
+ literal = rb_hash_aref(options, ID2SYM(id_literal));
554
+ if (!NIL_P(literal)) {
555
+ re2_options.set_literal(RTEST(literal));
556
+ }
532
557
 
533
- posix_syntax = rb_hash_aref(options, ID2SYM(id_posix_syntax));
534
- if (!NIL_P(posix_syntax)) {
535
- re2_options.set_posix_syntax(RTEST(posix_syntax));
536
- }
558
+ never_nl = rb_hash_aref(options, ID2SYM(id_never_nl));
559
+ if (!NIL_P(never_nl)) {
560
+ re2_options.set_never_nl(RTEST(never_nl));
561
+ }
537
562
 
538
- longest_match = rb_hash_aref(options, ID2SYM(id_longest_match));
539
- if (!NIL_P(longest_match)) {
540
- re2_options.set_longest_match(RTEST(longest_match));
541
- }
563
+ case_sensitive = rb_hash_aref(options, ID2SYM(id_case_sensitive));
564
+ if (!NIL_P(case_sensitive)) {
565
+ re2_options.set_case_sensitive(RTEST(case_sensitive));
566
+ }
542
567
 
543
- log_errors = rb_hash_aref(options, ID2SYM(id_log_errors));
544
- if (!NIL_P(log_errors)) {
545
- re2_options.set_log_errors(RTEST(log_errors));
546
- }
568
+ perl_classes = rb_hash_aref(options, ID2SYM(id_perl_classes));
569
+ if (!NIL_P(perl_classes)) {
570
+ re2_options.set_perl_classes(RTEST(perl_classes));
571
+ }
547
572
 
548
- max_mem = rb_hash_aref(options, ID2SYM(id_max_mem));
549
- if (!NIL_P(max_mem)) {
550
- re2_options.set_max_mem(NUM2INT(max_mem));
551
- }
573
+ word_boundary = rb_hash_aref(options, ID2SYM(id_word_boundary));
574
+ if (!NIL_P(word_boundary)) {
575
+ re2_options.set_word_boundary(RTEST(word_boundary));
576
+ }
552
577
 
553
- literal = rb_hash_aref(options, ID2SYM(id_literal));
554
- if (!NIL_P(literal)) {
555
- re2_options.set_literal(RTEST(literal));
556
- }
578
+ one_line = rb_hash_aref(options, ID2SYM(id_one_line));
579
+ if (!NIL_P(one_line)) {
580
+ re2_options.set_one_line(RTEST(one_line));
581
+ }
557
582
 
558
- never_nl = rb_hash_aref(options, ID2SYM(id_never_nl));
559
- if (!NIL_P(never_nl)) {
560
- re2_options.set_never_nl(RTEST(never_nl));
561
- }
583
+ p->pattern = new(nothrow) RE2(StringValuePtr(pattern), re2_options);
584
+ } else {
585
+ p->pattern = new(nothrow) RE2(StringValuePtr(pattern));
586
+ }
562
587
 
563
- case_sensitive = rb_hash_aref(options, ID2SYM(id_case_sensitive));
564
- if (!NIL_P(case_sensitive)) {
565
- re2_options.set_case_sensitive(RTEST(case_sensitive));
566
- }
588
+ if (p->pattern == 0) {
589
+ rb_raise(rb_eNoMemError, "not enough memory to allocate RE2 object");
590
+ }
567
591
 
568
- perl_classes = rb_hash_aref(options, ID2SYM(id_perl_classes));
569
- if (!NIL_P(perl_classes)) {
570
- re2_options.set_perl_classes(RTEST(perl_classes));
571
- }
592
+ return self;
593
+ }
572
594
 
573
- word_boundary = rb_hash_aref(options, ID2SYM(id_word_boundary));
574
- if (!NIL_P(word_boundary)) {
575
- re2_options.set_word_boundary(RTEST(word_boundary));
576
- }
595
+ /*
596
+ * Returns a printable version of the regular expression +re2+.
597
+ *
598
+ * @return [String] a printable version of the regular expression
599
+ * @example
600
+ * re2 = RE2::Regexp.new("woo?")
601
+ * re2.inspect #=> "#<RE2::Regexp /woo?/>"
602
+ */
603
+ static VALUE re2_regexp_inspect(VALUE self) {
604
+ re2_pattern *p;
605
+ VALUE result;
606
+ ostringstream output;
577
607
 
578
- one_line = rb_hash_aref(options, ID2SYM(id_one_line));
579
- if (!NIL_P(one_line)) {
580
- re2_options.set_one_line(RTEST(one_line));
581
- }
608
+ Data_Get_Struct(self, re2_pattern, p);
582
609
 
583
- p->pattern = new(nothrow) RE2(StringValuePtr(pattern), re2_options);
584
- } else {
585
- p->pattern = new(nothrow) RE2(StringValuePtr(pattern));
586
- }
610
+ output << "#<RE2::Regexp /" << p->pattern->pattern() << "/>";
587
611
 
588
- if (p->pattern == 0) {
589
- rb_raise(rb_eNoMemError, "not enough memory to allocate RE2 object");
590
- }
612
+ result = ENCODED_STR_NEW(output.str().data(), output.str().length(),
613
+ p->pattern->options().utf8() ? "UTF-8" : "ISO-8859-1");
591
614
 
592
- return self;
593
- }
615
+ return result;
616
+ }
617
+
618
+ /*
619
+ * Returns a string version of the regular expression +re2+.
620
+ *
621
+ * @return [String] a string version of the regular expression
622
+ * @example
623
+ * re2 = RE2::Regexp.new("woo?")
624
+ * re2.to_s #=> "woo?"
625
+ */
626
+ static VALUE re2_regexp_to_s(VALUE self) {
627
+ re2_pattern *p;
628
+ Data_Get_Struct(self, re2_pattern, p);
629
+ return ENCODED_STR_NEW(p->pattern->pattern().data(),
630
+ p->pattern->pattern().size(),
631
+ p->pattern->options().utf8() ? "UTF-8" : "ISO-8859-1");
632
+ }
594
633
 
595
- /*
596
- * Returns a printable version of the regular expression +re2+.
597
- *
598
- * @return [String] a printable version of the regular expression
599
- * @example
600
- * re2 = RE2::Regexp.new("woo?")
601
- * re2.inspect #=> "#<RE2::Regexp /woo?/>"
602
- */
603
- static VALUE re2_regexp_inspect(VALUE self) {
604
- re2_pattern *p;
605
- VALUE result;
606
- ostringstream output;
634
+ /*
635
+ * Returns whether or not the regular expression +re2+
636
+ * was compiled successfully or not.
637
+ *
638
+ * @return [Boolean] whether or not compilation was successful
639
+ * @example
640
+ * re2 = RE2::Regexp.new("woo?")
641
+ * re2.ok? #=> true
642
+ */
643
+ static VALUE re2_regexp_ok(VALUE self) {
644
+ re2_pattern *p;
645
+ Data_Get_Struct(self, re2_pattern, p);
646
+ return BOOL2RUBY(p->pattern->ok());
647
+ }
607
648
 
608
- Data_Get_Struct(self, re2_pattern, p);
649
+ /*
650
+ * Returns whether or not the regular expression +re2+
651
+ * was compiled with the utf8 option set to true.
652
+ *
653
+ * @return [Boolean] the utf8 option
654
+ * @example
655
+ * re2 = RE2::Regexp.new("woo?", :utf8 => true)
656
+ * re2.utf8? #=> true
657
+ */
658
+ static VALUE re2_regexp_utf8(VALUE self) {
659
+ re2_pattern *p;
660
+ Data_Get_Struct(self, re2_pattern, p);
661
+ return BOOL2RUBY(p->pattern->options().utf8());
662
+ }
609
663
 
610
- output << "#<RE2::Regexp /" << p->pattern->pattern() << "/>";
664
+ /*
665
+ * Returns whether or not the regular expression +re2+
666
+ * was compiled with the posix_syntax option set to true.
667
+ *
668
+ * @return [Boolean] the posix_syntax option
669
+ * @example
670
+ * re2 = RE2::Regexp.new("woo?", :posix_syntax => true)
671
+ * re2.posix_syntax? #=> true
672
+ */
673
+ static VALUE re2_regexp_posix_syntax(VALUE self) {
674
+ re2_pattern *p;
675
+ Data_Get_Struct(self, re2_pattern, p);
676
+ return BOOL2RUBY(p->pattern->options().posix_syntax());
677
+ }
611
678
 
612
- result = ENCODED_STR_NEW(output.str().data(), output.str().length(),
613
- p->pattern->options().utf8() ? "UTF-8" : "ISO-8859-1");
679
+ /*
680
+ * Returns whether or not the regular expression +re2+
681
+ * was compiled with the longest_match option set to true.
682
+ *
683
+ * @return [Boolean] the longest_match option
684
+ * @example
685
+ * re2 = RE2::Regexp.new("woo?", :longest_match => true)
686
+ * re2.longest_match? #=> true
687
+ */
688
+ static VALUE re2_regexp_longest_match(VALUE self) {
689
+ re2_pattern *p;
690
+ Data_Get_Struct(self, re2_pattern, p);
691
+ return BOOL2RUBY(p->pattern->options().longest_match());
692
+ }
614
693
 
615
- return result;
616
- }
694
+ /*
695
+ * Returns whether or not the regular expression +re2+
696
+ * was compiled with the log_errors option set to true.
697
+ *
698
+ * @return [Boolean] the log_errors option
699
+ * @example
700
+ * re2 = RE2::Regexp.new("woo?", :log_errors => true)
701
+ * re2.log_errors? #=> true
702
+ */
703
+ static VALUE re2_regexp_log_errors(VALUE self) {
704
+ re2_pattern *p;
705
+ Data_Get_Struct(self, re2_pattern, p);
706
+ return BOOL2RUBY(p->pattern->options().log_errors());
707
+ }
617
708
 
618
- /*
619
- * Returns a string version of the regular expression +re2+.
620
- *
621
- * @return [String] a string version of the regular expression
622
- * @example
623
- * re2 = RE2::Regexp.new("woo?")
624
- * re2.to_s #=> "woo?"
625
- */
626
- static VALUE re2_regexp_to_s(VALUE self) {
627
- re2_pattern *p;
628
- Data_Get_Struct(self, re2_pattern, p);
629
- return ENCODED_STR_NEW(p->pattern->pattern().data(),
630
- p->pattern->pattern().size(),
631
- p->pattern->options().utf8() ? "UTF-8" : "ISO-8859-1");
632
- }
709
+ /*
710
+ * Returns the max_mem setting for the regular expression
711
+ * +re2+.
712
+ *
713
+ * @return [Fixnum] the max_mem option
714
+ * @example
715
+ * re2 = RE2::Regexp.new("woo?", :max_mem => 1024)
716
+ * re2.max_mem #=> 1024
717
+ */
718
+ static VALUE re2_regexp_max_mem(VALUE self) {
719
+ re2_pattern *p;
720
+ Data_Get_Struct(self, re2_pattern, p);
721
+ return INT2FIX(p->pattern->options().max_mem());
722
+ }
633
723
 
634
- /*
635
- * Returns whether or not the regular expression +re2+
636
- * was compiled successfully or not.
637
- *
638
- * @return [Boolean] whether or not compilation was successful
639
- * @example
640
- * re2 = RE2::Regexp.new("woo?")
641
- * re2.ok? #=> true
642
- */
643
- static VALUE re2_regexp_ok(VALUE self) {
644
- re2_pattern *p;
645
- Data_Get_Struct(self, re2_pattern, p);
646
- return BOOL2RUBY(p->pattern->ok());
647
- }
724
+ /*
725
+ * Returns whether or not the regular expression +re2+
726
+ * was compiled with the literal option set to true.
727
+ *
728
+ * @return [Boolean] the literal option
729
+ * @example
730
+ * re2 = RE2::Regexp.new("woo?", :literal => true)
731
+ * re2.literal? #=> true
732
+ */
733
+ static VALUE re2_regexp_literal(VALUE self) {
734
+ re2_pattern *p;
735
+ Data_Get_Struct(self, re2_pattern, p);
736
+ return BOOL2RUBY(p->pattern->options().literal());
737
+ }
648
738
 
649
- /*
650
- * Returns whether or not the regular expression +re2+
651
- * was compiled with the utf8 option set to true.
652
- *
653
- * @return [Boolean] the utf8 option
654
- * @example
655
- * re2 = RE2::Regexp.new("woo?", :utf8 => true)
656
- * re2.utf8? #=> true
657
- */
658
- static VALUE re2_regexp_utf8(VALUE self) {
659
- re2_pattern *p;
660
- Data_Get_Struct(self, re2_pattern, p);
661
- return BOOL2RUBY(p->pattern->options().utf8());
662
- }
739
+ /*
740
+ * Returns whether or not the regular expression +re2+
741
+ * was compiled with the never_nl option set to true.
742
+ *
743
+ * @return [Boolean] the never_nl option
744
+ * @example
745
+ * re2 = RE2::Regexp.new("woo?", :never_nl => true)
746
+ * re2.never_nl? #=> true
747
+ */
748
+ static VALUE re2_regexp_never_nl(VALUE self) {
749
+ re2_pattern *p;
750
+ Data_Get_Struct(self, re2_pattern, p);
751
+ return BOOL2RUBY(p->pattern->options().never_nl());
752
+ }
663
753
 
664
- /*
665
- * Returns whether or not the regular expression +re2+
666
- * was compiled with the posix_syntax option set to true.
667
- *
668
- * @return [Boolean] the posix_syntax option
669
- * @example
670
- * re2 = RE2::Regexp.new("woo?", :posix_syntax => true)
671
- * re2.posix_syntax? #=> true
672
- */
673
- static VALUE re2_regexp_posix_syntax(VALUE self) {
674
- re2_pattern *p;
675
- Data_Get_Struct(self, re2_pattern, p);
676
- return BOOL2RUBY(p->pattern->options().posix_syntax());
677
- }
754
+ /*
755
+ * Returns whether or not the regular expression +re2+
756
+ * was compiled with the case_sensitive option set to true.
757
+ *
758
+ * @return [Boolean] the case_sensitive option
759
+ * @example
760
+ * re2 = RE2::Regexp.new("woo?", :case_sensitive => true)
761
+ * re2.case_sensitive? #=> true
762
+ */
763
+ static VALUE re2_regexp_case_sensitive(VALUE self) {
764
+ re2_pattern *p;
765
+ Data_Get_Struct(self, re2_pattern, p);
766
+ return BOOL2RUBY(p->pattern->options().case_sensitive());
767
+ }
678
768
 
679
- /*
680
- * Returns whether or not the regular expression +re2+
681
- * was compiled with the longest_match option set to true.
682
- *
683
- * @return [Boolean] the longest_match option
684
- * @example
685
- * re2 = RE2::Regexp.new("woo?", :longest_match => true)
686
- * re2.longest_match? #=> true
687
- */
688
- static VALUE re2_regexp_longest_match(VALUE self) {
689
- re2_pattern *p;
690
- Data_Get_Struct(self, re2_pattern, p);
691
- return BOOL2RUBY(p->pattern->options().longest_match());
692
- }
769
+ /*
770
+ * Returns whether or not the regular expression +re2+
771
+ * was compiled with the case_sensitive option set to false.
772
+ *
773
+ * @return [Boolean] the inverse of the case_sensitive option
774
+ * @example
775
+ * re2 = RE2::Regexp.new("woo?", :case_sensitive => true)
776
+ * re2.case_insensitive? #=> false
777
+ * re2.casefold? #=> false
778
+ */
779
+ static VALUE re2_regexp_case_insensitive(VALUE self) {
780
+ return BOOL2RUBY(re2_regexp_case_sensitive(self) != Qtrue);
781
+ }
693
782
 
694
- /*
695
- * Returns whether or not the regular expression +re2+
696
- * was compiled with the log_errors option set to true.
697
- *
698
- * @return [Boolean] the log_errors option
699
- * @example
700
- * re2 = RE2::Regexp.new("woo?", :log_errors => true)
701
- * re2.log_errors? #=> true
702
- */
703
- static VALUE re2_regexp_log_errors(VALUE self) {
704
- re2_pattern *p;
705
- Data_Get_Struct(self, re2_pattern, p);
706
- return BOOL2RUBY(p->pattern->options().log_errors());
707
- }
783
+ /*
784
+ * Returns whether or not the regular expression +re2+
785
+ * was compiled with the perl_classes option set to true.
786
+ *
787
+ * @return [Boolean] the perl_classes option
788
+ * @example
789
+ * re2 = RE2::Regexp.new("woo?", :perl_classes => true)
790
+ * re2.perl_classes? #=> true
791
+ */
792
+ static VALUE re2_regexp_perl_classes(VALUE self) {
793
+ re2_pattern *p;
794
+ Data_Get_Struct(self, re2_pattern, p);
795
+ return BOOL2RUBY(p->pattern->options().perl_classes());
796
+ }
708
797
 
709
- /*
710
- * Returns the max_mem setting for the regular expression
711
- * +re2+.
712
- *
713
- * @return [Fixnum] the max_mem option
714
- * @example
715
- * re2 = RE2::Regexp.new("woo?", :max_mem => 1024)
716
- * re2.max_mem #=> 1024
717
- */
718
- static VALUE re2_regexp_max_mem(VALUE self) {
719
- re2_pattern *p;
720
- Data_Get_Struct(self, re2_pattern, p);
721
- return INT2FIX(p->pattern->options().max_mem());
722
- }
798
+ /*
799
+ * Returns whether or not the regular expression +re2+
800
+ * was compiled with the word_boundary option set to true.
801
+ *
802
+ * @return [Boolean] the word_boundary option
803
+ * @example
804
+ * re2 = RE2::Regexp.new("woo?", :word_boundary => true)
805
+ * re2.word_boundary? #=> true
806
+ */
807
+ static VALUE re2_regexp_word_boundary(VALUE self) {
808
+ re2_pattern *p;
809
+ Data_Get_Struct(self, re2_pattern, p);
810
+ return BOOL2RUBY(p->pattern->options().word_boundary());
811
+ }
723
812
 
724
- /*
725
- * Returns whether or not the regular expression +re2+
726
- * was compiled with the literal option set to true.
727
- *
728
- * @return [Boolean] the literal option
729
- * @example
730
- * re2 = RE2::Regexp.new("woo?", :literal => true)
731
- * re2.literal? #=> true
732
- */
733
- static VALUE re2_regexp_literal(VALUE self) {
734
- re2_pattern *p;
735
- Data_Get_Struct(self, re2_pattern, p);
736
- return BOOL2RUBY(p->pattern->options().literal());
737
- }
813
+ /*
814
+ * Returns whether or not the regular expression +re2+
815
+ * was compiled with the one_line option set to true.
816
+ *
817
+ * @return [Boolean] the one_line option
818
+ * @example
819
+ * re2 = RE2::Regexp.new("woo?", :one_line => true)
820
+ * re2.one_line? #=> true
821
+ */
822
+ static VALUE re2_regexp_one_line(VALUE self) {
823
+ re2_pattern *p;
824
+ Data_Get_Struct(self, re2_pattern, p);
825
+ return BOOL2RUBY(p->pattern->options().one_line());
826
+ }
738
827
 
739
- /*
740
- * Returns whether or not the regular expression +re2+
741
- * was compiled with the never_nl option set to true.
742
- *
743
- * @return [Boolean] the never_nl option
744
- * @example
745
- * re2 = RE2::Regexp.new("woo?", :never_nl => true)
746
- * re2.never_nl? #=> true
747
- */
748
- static VALUE re2_regexp_never_nl(VALUE self) {
749
- re2_pattern *p;
750
- Data_Get_Struct(self, re2_pattern, p);
751
- return BOOL2RUBY(p->pattern->options().never_nl());
828
+ /*
829
+ * If the RE2 could not be created properly, returns an
830
+ * error string otherwise returns nil.
831
+ *
832
+ * @return [String, nil] the error string or nil
833
+ */
834
+ static VALUE re2_regexp_error(VALUE self) {
835
+ re2_pattern *p;
836
+ Data_Get_Struct(self, re2_pattern, p);
837
+ if (p->pattern->ok()) {
838
+ return Qnil;
839
+ } else {
840
+ return rb_str_new(p->pattern->error().data(), p->pattern->error().size());
752
841
  }
842
+ }
753
843
 
754
- /*
755
- * Returns whether or not the regular expression +re2+
756
- * was compiled with the case_sensitive option set to true.
757
- *
758
- * @return [Boolean] the case_sensitive option
759
- * @example
760
- * re2 = RE2::Regexp.new("woo?", :case_sensitive => true)
761
- * re2.case_sensitive? #=> true
762
- */
763
- static VALUE re2_regexp_case_sensitive(VALUE self) {
764
- re2_pattern *p;
765
- Data_Get_Struct(self, re2_pattern, p);
766
- return BOOL2RUBY(p->pattern->options().case_sensitive());
844
+ /*
845
+ * If the RE2 could not be created properly, returns
846
+ * the offending portion of the regexp otherwise returns nil.
847
+ *
848
+ * @return [String, nil] the offending portion of the regexp or nil
849
+ */
850
+ static VALUE re2_regexp_error_arg(VALUE self) {
851
+ re2_pattern *p;
852
+ Data_Get_Struct(self, re2_pattern, p);
853
+ if (p->pattern->ok()) {
854
+ return Qnil;
855
+ } else {
856
+ return ENCODED_STR_NEW(p->pattern->error_arg().data(),
857
+ p->pattern->error_arg().size(),
858
+ p->pattern->options().utf8() ? "UTF-8" : "ISO-8859-1");
767
859
  }
860
+ }
768
861
 
769
- /*
770
- * Returns whether or not the regular expression +re2+
771
- * was compiled with the case_sensitive option set to false.
772
- *
773
- * @return [Boolean] the inverse of the case_sensitive option
774
- * @example
775
- * re2 = RE2::Regexp.new("woo?", :case_sensitive => true)
776
- * re2.case_insensitive? #=> false
777
- * re2.casefold? #=> false
778
- */
779
- static VALUE re2_regexp_case_insensitive(VALUE self) {
780
- return BOOL2RUBY(re2_regexp_case_sensitive(self) != Qtrue);
781
- }
862
+ /*
863
+ * Returns the program size, a very approximate measure
864
+ * of a regexp's "cost". Larger numbers are more expensive
865
+ * than smaller numbers.
866
+ *
867
+ * @return [Fixnum] the regexp "cost"
868
+ */
869
+ static VALUE re2_regexp_program_size(VALUE self) {
870
+ re2_pattern *p;
871
+ Data_Get_Struct(self, re2_pattern, p);
872
+ return INT2FIX(p->pattern->ProgramSize());
873
+ }
782
874
 
783
- /*
784
- * Returns whether or not the regular expression +re2+
785
- * was compiled with the perl_classes option set to true.
786
- *
787
- * @return [Boolean] the perl_classes option
788
- * @example
789
- * re2 = RE2::Regexp.new("woo?", :perl_classes => true)
790
- * re2.perl_classes? #=> true
791
- */
792
- static VALUE re2_regexp_perl_classes(VALUE self) {
793
- re2_pattern *p;
794
- Data_Get_Struct(self, re2_pattern, p);
795
- return BOOL2RUBY(p->pattern->options().perl_classes());
796
- }
875
+ /*
876
+ * Returns a hash of the options currently set for
877
+ * +re2+.
878
+ *
879
+ * @return [Hash] the options
880
+ */
881
+ static VALUE re2_regexp_options(VALUE self) {
882
+ VALUE options;
883
+ re2_pattern *p;
797
884
 
798
- /*
799
- * Returns whether or not the regular expression +re2+
800
- * was compiled with the word_boundary option set to true.
801
- *
802
- * @return [Boolean] the word_boundary option
803
- * @example
804
- * re2 = RE2::Regexp.new("woo?", :word_boundary => true)
805
- * re2.word_boundary? #=> true
806
- */
807
- static VALUE re2_regexp_word_boundary(VALUE self) {
808
- re2_pattern *p;
809
- Data_Get_Struct(self, re2_pattern, p);
810
- return BOOL2RUBY(p->pattern->options().word_boundary());
811
- }
885
+ Data_Get_Struct(self, re2_pattern, p);
886
+ options = rb_hash_new();
812
887
 
813
- /*
814
- * Returns whether or not the regular expression +re2+
815
- * was compiled with the one_line option set to true.
816
- *
817
- * @return [Boolean] the one_line option
818
- * @example
819
- * re2 = RE2::Regexp.new("woo?", :one_line => true)
820
- * re2.one_line? #=> true
821
- */
822
- static VALUE re2_regexp_one_line(VALUE self) {
823
- re2_pattern *p;
824
- Data_Get_Struct(self, re2_pattern, p);
825
- return BOOL2RUBY(p->pattern->options().one_line());
826
- }
888
+ rb_hash_aset(options, ID2SYM(id_utf8),
889
+ BOOL2RUBY(p->pattern->options().utf8()));
827
890
 
828
- /*
829
- * If the RE2 could not be created properly, returns an
830
- * error string otherwise returns nil.
831
- *
832
- * @return [String, nil] the error string or nil
833
- */
834
- static VALUE re2_regexp_error(VALUE self) {
835
- re2_pattern *p;
836
- Data_Get_Struct(self, re2_pattern, p);
837
- if (p->pattern->ok()) {
838
- return Qnil;
839
- } else {
840
- return rb_str_new(p->pattern->error().data(), p->pattern->error().size());
841
- }
842
- }
891
+ rb_hash_aset(options, ID2SYM(id_posix_syntax),
892
+ BOOL2RUBY(p->pattern->options().posix_syntax()));
843
893
 
844
- /*
845
- * If the RE2 could not be created properly, returns
846
- * the offending portion of the regexp otherwise returns nil.
847
- *
848
- * @return [String, nil] the offending portion of the regexp or nil
849
- */
850
- static VALUE re2_regexp_error_arg(VALUE self) {
851
- re2_pattern *p;
852
- Data_Get_Struct(self, re2_pattern, p);
853
- if (p->pattern->ok()) {
854
- return Qnil;
855
- } else {
856
- return ENCODED_STR_NEW(p->pattern->error_arg().data(),
857
- p->pattern->error_arg().size(),
858
- p->pattern->options().utf8() ? "UTF-8" : "ISO-8859-1");
859
- }
860
- }
894
+ rb_hash_aset(options, ID2SYM(id_longest_match),
895
+ BOOL2RUBY(p->pattern->options().longest_match()));
861
896
 
862
- /*
863
- * Returns the program size, a very approximate measure
864
- * of a regexp's "cost". Larger numbers are more expensive
865
- * than smaller numbers.
866
- *
867
- * @return [Fixnum] the regexp "cost"
868
- */
869
- static VALUE re2_regexp_program_size(VALUE self) {
870
- re2_pattern *p;
871
- Data_Get_Struct(self, re2_pattern, p);
872
- return INT2FIX(p->pattern->ProgramSize());
873
- }
897
+ rb_hash_aset(options, ID2SYM(id_log_errors),
898
+ BOOL2RUBY(p->pattern->options().log_errors()));
899
+
900
+ rb_hash_aset(options, ID2SYM(id_max_mem),
901
+ INT2FIX(p->pattern->options().max_mem()));
874
902
 
875
- /*
876
- * Returns a hash of the options currently set for
877
- * +re2+.
878
- *
879
- * @return [Hash] the options
880
- */
881
- static VALUE re2_regexp_options(VALUE self) {
882
- VALUE options;
883
- re2_pattern *p;
903
+ rb_hash_aset(options, ID2SYM(id_literal),
904
+ BOOL2RUBY(p->pattern->options().literal()));
884
905
 
885
- Data_Get_Struct(self, re2_pattern, p);
886
- options = rb_hash_new();
906
+ rb_hash_aset(options, ID2SYM(id_never_nl),
907
+ BOOL2RUBY(p->pattern->options().never_nl()));
887
908
 
888
- rb_hash_aset(options, ID2SYM(id_utf8),
889
- BOOL2RUBY(p->pattern->options().utf8()));
909
+ rb_hash_aset(options, ID2SYM(id_case_sensitive),
910
+ BOOL2RUBY(p->pattern->options().case_sensitive()));
890
911
 
891
- rb_hash_aset(options, ID2SYM(id_posix_syntax),
892
- BOOL2RUBY(p->pattern->options().posix_syntax()));
912
+ rb_hash_aset(options, ID2SYM(id_perl_classes),
913
+ BOOL2RUBY(p->pattern->options().perl_classes()));
893
914
 
894
- rb_hash_aset(options, ID2SYM(id_longest_match),
895
- BOOL2RUBY(p->pattern->options().longest_match()));
915
+ rb_hash_aset(options, ID2SYM(id_word_boundary),
916
+ BOOL2RUBY(p->pattern->options().word_boundary()));
896
917
 
897
- rb_hash_aset(options, ID2SYM(id_log_errors),
898
- BOOL2RUBY(p->pattern->options().log_errors()));
918
+ rb_hash_aset(options, ID2SYM(id_one_line),
919
+ BOOL2RUBY(p->pattern->options().one_line()));
899
920
 
900
- rb_hash_aset(options, ID2SYM(id_max_mem),
901
- INT2FIX(p->pattern->options().max_mem()));
921
+ /* This is a read-only hash after all... */
922
+ rb_obj_freeze(options);
923
+
924
+ return options;
925
+ }
902
926
 
903
- rb_hash_aset(options, ID2SYM(id_literal),
904
- BOOL2RUBY(p->pattern->options().literal()));
927
+ /*
928
+ * Returns the number of capturing subpatterns, or -1 if the regexp
929
+ * wasn't valid on construction. The overall match ($0) does not
930
+ * count: if the regexp is "(a)(b)", returns 2.
931
+ *
932
+ * @return [Fixnum] the number of capturing subpatterns
933
+ */
934
+ static VALUE re2_regexp_number_of_capturing_groups(VALUE self) {
935
+ re2_pattern *p;
905
936
 
906
- rb_hash_aset(options, ID2SYM(id_never_nl),
907
- BOOL2RUBY(p->pattern->options().never_nl()));
937
+ Data_Get_Struct(self, re2_pattern, p);
938
+ return INT2FIX(p->pattern->NumberOfCapturingGroups());
939
+ }
908
940
 
909
- rb_hash_aset(options, ID2SYM(id_case_sensitive),
910
- BOOL2RUBY(p->pattern->options().case_sensitive()));
941
+ /*
942
+ * Returns a hash of names to capturing indices of groups.
943
+ *
944
+ * @return [Hash] a hash of names to capturing indices
945
+ */
946
+ static VALUE re2_regexp_named_capturing_groups(VALUE self) {
947
+ VALUE capturing_groups;
948
+ re2_pattern *p;
949
+ map<string, int> groups;
950
+ map<string, int>::iterator iterator;
951
+
952
+ Data_Get_Struct(self, re2_pattern, p);
953
+ groups = p->pattern->NamedCapturingGroups();
954
+ capturing_groups = rb_hash_new();
955
+
956
+ for (iterator = groups.begin(); iterator != groups.end(); iterator++) {
957
+ rb_hash_aset(capturing_groups,
958
+ ENCODED_STR_NEW(iterator->first.data(), iterator->first.size(),
959
+ p->pattern->options().utf8() ? "UTF-8" : "ISO-8859-1"),
960
+ INT2FIX(iterator->second));
961
+ }
911
962
 
912
- rb_hash_aset(options, ID2SYM(id_perl_classes),
913
- BOOL2RUBY(p->pattern->options().perl_classes()));
963
+ return capturing_groups;
964
+ }
914
965
 
915
- rb_hash_aset(options, ID2SYM(id_word_boundary),
916
- BOOL2RUBY(p->pattern->options().word_boundary()));
966
+ /*
967
+ * Match the pattern against the given +text+ and return either
968
+ * a boolean (if no submatches are required) or a {RE2::MatchData}
969
+ * instance.
970
+ *
971
+ * @return [Boolean, RE2::MatchData]
972
+ *
973
+ * @overload match(text)
974
+ * Returns an {RE2::MatchData} containing the matching
975
+ * pattern and all subpatterns resulting from looking for
976
+ * the regexp in +text+.
977
+ *
978
+ * @param [String] text the text to search
979
+ * @return [RE2::MatchData] the matches
980
+ * @raise [NoMemoryError] if there was not enough memory to allocate the matches
981
+ * @example
982
+ * r = RE2::Regexp.new('w(o)(o)')
983
+ * r.match('woo') #=> #<RE2::MatchData "woo" 1:"o" 2:"o">
984
+ *
985
+ * @overload match(text, 0)
986
+ * Returns either true or false indicating whether a
987
+ * successful match was made.
988
+ *
989
+ * @param [String] text the text to search
990
+ * @return [Boolean] whether the match was successful
991
+ * @raise [NoMemoryError] if there was not enough memory to allocate the matches
992
+ * @example
993
+ * r = RE2::Regexp.new('w(o)(o)')
994
+ * r.match('woo', 0) #=> true
995
+ * r.match('bob', 0) #=> false
996
+ *
997
+ * @overload match(text, number_of_matches)
998
+ * See +match(text)+ but with a specific number of
999
+ * matches returned (padded with nils if necessary).
1000
+ *
1001
+ * @param [String] text the text to search
1002
+ * @param [Fixnum] number_of_matches the number of matches to return
1003
+ * @return [RE2::MatchData] the matches
1004
+ * @raise [NoMemoryError] if there was not enough memory to allocate the matches
1005
+ * @example
1006
+ * r = RE2::Regexp.new('w(o)(o)')
1007
+ * r.match('woo', 1) #=> #<RE2::MatchData "woo" 1:"o">
1008
+ * r.match('woo', 3) #=> #<RE2::MatchData "woo" 1:"o" 2:"o" 3:nil>
1009
+ */
1010
+ static VALUE re2_regexp_match(int argc, VALUE *argv, VALUE self) {
1011
+ int n;
1012
+ bool matched;
1013
+ re2_pattern *p;
1014
+ re2_matchdata *m;
1015
+ VALUE text, number_of_matches, matchdata;
917
1016
 
918
- rb_hash_aset(options, ID2SYM(id_one_line),
919
- BOOL2RUBY(p->pattern->options().one_line()));
1017
+ rb_scan_args(argc, argv, "11", &text, &number_of_matches);
920
1018
 
921
- /* This is a read-only hash after all... */
922
- rb_obj_freeze(options);
1019
+ /* Ensure text is a string. */
1020
+ text = StringValue(text);
923
1021
 
924
- return options;
925
- }
1022
+ Data_Get_Struct(self, re2_pattern, p);
926
1023
 
927
- /*
928
- * Returns the number of capturing subpatterns, or -1 if the regexp
929
- * wasn't valid on construction. The overall match ($0) does not
930
- * count: if the regexp is "(a)(b)", returns 2.
931
- *
932
- * @return [Fixnum] the number of capturing subpatterns
933
- */
934
- static VALUE re2_regexp_number_of_capturing_groups(VALUE self) {
935
- re2_pattern *p;
936
-
937
- Data_Get_Struct(self, re2_pattern, p);
938
- return INT2FIX(p->pattern->NumberOfCapturingGroups());
1024
+ if (RTEST(number_of_matches)) {
1025
+ n = NUM2INT(number_of_matches);
1026
+ } else {
1027
+ n = p->pattern->NumberOfCapturingGroups();
939
1028
  }
940
1029
 
941
- /*
942
- * Returns a hash of names to capturing indices of groups.
943
- *
944
- * @return [Hash] a hash of names to capturing indices
945
- */
946
- static VALUE re2_regexp_named_capturing_groups(VALUE self) {
947
- VALUE capturing_groups;
948
- re2_pattern *p;
949
- map<string, int> groups;
950
- map<string, int>::iterator iterator;
951
-
952
- Data_Get_Struct(self, re2_pattern, p);
953
- groups = p->pattern->NamedCapturingGroups();
954
- capturing_groups = rb_hash_new();
955
-
956
- for (iterator = groups.begin(); iterator != groups.end(); iterator++) {
957
- rb_hash_aset(capturing_groups,
958
- ENCODED_STR_NEW(iterator->first.data(), iterator->first.size(),
959
- p->pattern->options().utf8() ? "UTF-8" : "ISO-8859-1"),
960
- INT2FIX(iterator->second));
1030
+ if (n == 0) {
1031
+ matched = match(p->pattern, StringValuePtr(text), 0,
1032
+ static_cast<int>(RSTRING_LEN(text)), RE2::UNANCHORED, 0, 0);
1033
+ return BOOL2RUBY(matched);
1034
+ } else {
1035
+
1036
+ /* Because match returns the whole match as well. */
1037
+ n += 1;
1038
+
1039
+ matchdata = rb_class_new_instance(0, 0, re2_cMatchData);
1040
+ Data_Get_Struct(matchdata, re2_matchdata, m);
1041
+ m->matches = new(nothrow) re2::StringPiece[n];
1042
+ m->regexp = self;
1043
+ m->text = rb_str_dup(text);
1044
+ rb_str_freeze(m->text);
1045
+
1046
+ if (m->matches == 0) {
1047
+ rb_raise(rb_eNoMemError,
1048
+ "not enough memory to allocate StringPieces for matches");
961
1049
  }
962
1050
 
963
- return capturing_groups;
964
- }
1051
+ m->number_of_matches = n;
1052
+
1053
+ matched = match(p->pattern, StringValuePtr(text), 0,
1054
+ static_cast<int>(RSTRING_LEN(text)),
1055
+ RE2::UNANCHORED, m->matches, n);
965
1056
 
966
- /*
967
- * Match the pattern against the given +text+ and return either
968
- * a boolean (if no submatches are required) or a {RE2::MatchData}
969
- * instance.
970
- *
971
- * @return [Boolean, RE2::MatchData]
972
- *
973
- * @overload match(text)
974
- * Returns an {RE2::MatchData} containing the matching
975
- * pattern and all subpatterns resulting from looking for
976
- * the regexp in +text+.
977
- *
978
- * @param [String] text the text to search
979
- * @return [RE2::MatchData] the matches
980
- * @raise [NoMemoryError] if there was not enough memory to allocate the matches
981
- * @example
982
- * r = RE2::Regexp.new('w(o)(o)')
983
- * r.match('woo') #=> #<RE2::MatchData "woo" 1:"o" 2:"o">
984
- *
985
- * @overload match(text, 0)
986
- * Returns either true or false indicating whether a
987
- * successful match was made.
988
- *
989
- * @param [String] text the text to search
990
- * @return [Boolean] whether the match was successful
991
- * @raise [NoMemoryError] if there was not enough memory to allocate the matches
992
- * @example
993
- * r = RE2::Regexp.new('w(o)(o)')
994
- * r.match('woo', 0) #=> true
995
- * r.match('bob', 0) #=> false
996
- *
997
- * @overload match(text, number_of_matches)
998
- * See +match(text)+ but with a specific number of
999
- * matches returned (padded with nils if necessary).
1000
- *
1001
- * @param [String] text the text to search
1002
- * @param [Fixnum] number_of_matches the number of matches to return
1003
- * @return [RE2::MatchData] the matches
1004
- * @raise [NoMemoryError] if there was not enough memory to allocate the matches
1005
- * @example
1006
- * r = RE2::Regexp.new('w(o)(o)')
1007
- * r.match('woo', 1) #=> #<RE2::MatchData "woo" 1:"o">
1008
- * r.match('woo', 3) #=> #<RE2::MatchData "woo" 1:"o" 2:"o" 3:nil>
1009
- */
1010
- static VALUE re2_regexp_match(int argc, VALUE *argv, VALUE self) {
1011
- int n;
1012
- bool matched;
1013
- re2_pattern *p;
1014
- re2_matchdata *m;
1015
- VALUE text, number_of_matches, matchdata;
1016
-
1017
- rb_scan_args(argc, argv, "11", &text, &number_of_matches);
1018
-
1019
- Data_Get_Struct(self, re2_pattern, p);
1020
-
1021
- if (RTEST(number_of_matches)) {
1022
- n = NUM2INT(number_of_matches);
1057
+ if (matched) {
1058
+ return matchdata;
1023
1059
  } else {
1024
- n = p->pattern->NumberOfCapturingGroups();
1060
+ return Qnil;
1025
1061
  }
1062
+ }
1063
+ }
1026
1064
 
1027
- if (n == 0) {
1028
- matched = match(p->pattern, StringValuePtr(text), 0,
1029
- static_cast<int>(RSTRING_LEN(text)), RE2::UNANCHORED, 0, 0);
1030
- return BOOL2RUBY(matched);
1031
- } else {
1065
+ /*
1066
+ * Returns true or false to indicate a successful match.
1067
+ * Equivalent to +re2.match(text, 0)+.
1068
+ *
1069
+ * @return [Boolean] whether the match was successful
1070
+ */
1071
+ static VALUE re2_regexp_match_query(VALUE self, VALUE text) {
1072
+ VALUE argv[2];
1073
+ argv[0] = text;
1074
+ argv[1] = INT2FIX(0);
1032
1075
 
1033
- /* Because match returns the whole match as well. */
1034
- n += 1;
1076
+ return re2_regexp_match(2, argv, self);
1077
+ }
1035
1078
 
1036
- matchdata = rb_class_new_instance(0, 0, re2_cMatchData);
1037
- Data_Get_Struct(matchdata, re2_matchdata, m);
1038
- m->matches = new(nothrow) re2::StringPiece[n];
1039
- m->regexp = self;
1040
- m->text = rb_str_dup(text);
1041
- rb_str_freeze(m->text);
1079
+ /*
1080
+ * Returns a {RE2::Scanner} for scanning the given text incrementally.
1081
+ *
1082
+ * @example
1083
+ * c = RE2::Regexp.new('(\w+)').scan("Foo bar baz")
1084
+ */
1085
+ static VALUE re2_regexp_scan(VALUE self, VALUE text) {
1086
+ re2_pattern *p;
1087
+ re2_scanner *c;
1088
+ VALUE scanner;
1042
1089
 
1043
- if (m->matches == 0) {
1044
- rb_raise(rb_eNoMemError,
1045
- "not enough memory to allocate StringPieces for matches");
1046
- }
1090
+ Data_Get_Struct(self, re2_pattern, p);
1091
+ scanner = rb_class_new_instance(0, 0, re2_cScanner);
1092
+ Data_Get_Struct(scanner, re2_scanner, c);
1047
1093
 
1048
- m->number_of_matches = n;
1094
+ c->input = new(nothrow) re2::StringPiece(StringValuePtr(text));
1095
+ c->regexp = self;
1096
+ c->text = text;
1097
+ c->number_of_capturing_groups = p->pattern->NumberOfCapturingGroups();
1049
1098
 
1050
- matched = match(p->pattern, StringValuePtr(text), 0,
1051
- static_cast<int>(RSTRING_LEN(text)),
1052
- RE2::UNANCHORED, m->matches, n);
1099
+ return scanner;
1100
+ }
1053
1101
 
1054
- if (matched) {
1055
- return matchdata;
1056
- } else {
1057
- return Qnil;
1058
- }
1059
- }
1060
- }
1102
+ /*
1103
+ * Returns a copy of +str+ with the first occurrence +pattern+
1104
+ * replaced with +rewrite+.
1105
+ *
1106
+ * @param [String] str the string to modify
1107
+ * @param [String, RE2::Regexp] pattern a regexp matching text to be replaced
1108
+ * @param [String] rewrite the string to replace with
1109
+ * @return [String] the resulting string
1110
+ * @example
1111
+ * RE2.Replace("hello there", "hello", "howdy") #=> "howdy there"
1112
+ * re2 = RE2.new("hel+o")
1113
+ * RE2.Replace("hello there", re2, "yo") #=> "yo there"
1114
+ */
1115
+ static VALUE re2_Replace(VALUE self, VALUE str, VALUE pattern,
1116
+ VALUE rewrite) {
1117
+ UNUSED(self);
1118
+ re2_pattern *p;
1061
1119
 
1062
- /*
1063
- * Returns true or false to indicate a successful match.
1064
- * Equivalent to +re2.match(text, 0)+.
1065
- *
1066
- * @return [Boolean] whether the match was successful
1067
- */
1068
- static VALUE re2_regexp_match_query(VALUE self, VALUE text) {
1069
- VALUE argv[2];
1070
- argv[0] = text;
1071
- argv[1] = INT2FIX(0);
1072
-
1073
- return re2_regexp_match(2, argv, self);
1074
- }
1120
+ /* Convert all the inputs to be pumped into RE2::Replace. */
1121
+ string str_as_string(StringValuePtr(str));
1075
1122
 
1076
- /*
1077
- * Returns a {RE2::Scanner} for scanning the given text incrementally.
1078
- *
1079
- * @example
1080
- * c = RE2::Regexp.new('(\w+)').scan("Foo bar baz")
1081
- */
1082
- static VALUE re2_regexp_scan(VALUE self, VALUE text) {
1083
- re2_pattern *p;
1084
- re2_scanner *c;
1085
- VALUE scanner;
1086
-
1087
- Data_Get_Struct(self, re2_pattern, p);
1088
- scanner = rb_class_new_instance(0, 0, re2_cScanner);
1089
- Data_Get_Struct(scanner, re2_scanner, c);
1090
-
1091
- c->input = new(nothrow) re2::StringPiece(StringValuePtr(text));
1092
- c->regexp = self;
1093
- c->text = text;
1094
- c->number_of_capturing_groups = p->pattern->NumberOfCapturingGroups();
1095
-
1096
- return scanner;
1123
+ /* Do the replacement. */
1124
+ if (rb_obj_is_kind_of(pattern, re2_cRegexp)) {
1125
+ Data_Get_Struct(pattern, re2_pattern, p);
1126
+ RE2::Replace(&str_as_string, *p->pattern, StringValuePtr(rewrite));
1127
+
1128
+ return ENCODED_STR_NEW(str_as_string.data(), str_as_string.size(),
1129
+ p->pattern->options().utf8() ? "UTF-8" : "ISO-8859-1");
1130
+ } else {
1131
+ RE2::Replace(&str_as_string, StringValuePtr(pattern),
1132
+ StringValuePtr(rewrite));
1133
+
1134
+ return ENCODED_STR_NEW2(str_as_string.data(), str_as_string.size(),
1135
+ pattern);
1097
1136
  }
1098
1137
 
1099
- /*
1100
- * Returns a copy of +str+ with the first occurrence +pattern+
1101
- * replaced with +rewrite+.
1102
- *
1103
- * @param [String] str the string to modify
1104
- * @param [String, RE2::Regexp] pattern a regexp matching text to be replaced
1105
- * @param [String] rewrite the string to replace with
1106
- * @return [String] the resulting string
1107
- * @example
1108
- * RE2.Replace("hello there", "hello", "howdy") #=> "howdy there"
1109
- * re2 = RE2.new("hel+o")
1110
- * RE2.Replace("hello there", re2, "yo") #=> "yo there"
1111
- */
1112
- static VALUE re2_Replace(VALUE self, VALUE str, VALUE pattern,
1113
- VALUE rewrite) {
1114
- UNUSED(self);
1115
- re2_pattern *p;
1116
-
1117
- /* Convert all the inputs to be pumped into RE2::Replace. */
1118
- string str_as_string(StringValuePtr(str));
1119
-
1120
- /* Do the replacement. */
1121
- if (rb_obj_is_kind_of(pattern, re2_cRegexp)) {
1122
- Data_Get_Struct(pattern, re2_pattern, p);
1123
- RE2::Replace(&str_as_string, *p->pattern, StringValuePtr(rewrite));
1124
-
1125
- return ENCODED_STR_NEW(str_as_string.data(), str_as_string.size(),
1126
- p->pattern->options().utf8() ? "UTF-8" : "ISO-8859-1");
1127
- } else {
1128
- RE2::Replace(&str_as_string, StringValuePtr(pattern),
1129
- StringValuePtr(rewrite));
1138
+ }
1130
1139
 
1131
- return ENCODED_STR_NEW2(str_as_string.data(), str_as_string.size(),
1132
- pattern);
1133
- }
1140
+ /*
1141
+ * Return a copy of +str+ with +pattern+ replaced by +rewrite+.
1142
+ *
1143
+ * @param [String] str the string to modify
1144
+ * @param [String, RE2::Regexp] pattern a regexp matching text to be replaced
1145
+ * @param [String] rewrite the string to replace with
1146
+ * @return [String] the resulting string
1147
+ * @example
1148
+ * re2 = RE2.new("oo?")
1149
+ * RE2.GlobalReplace("whoops-doops", re2, "e") #=> "wheps-deps"
1150
+ * RE2.GlobalReplace("hello there", "e", "i") #=> "hillo thiri"
1151
+ */
1152
+ static VALUE re2_GlobalReplace(VALUE self, VALUE str, VALUE pattern,
1153
+ VALUE rewrite) {
1154
+ UNUSED(self);
1134
1155
 
1135
- }
1156
+ /* Convert all the inputs to be pumped into RE2::GlobalReplace. */
1157
+ re2_pattern *p;
1158
+ string str_as_string(StringValuePtr(str));
1136
1159
 
1137
- /*
1138
- * Return a copy of +str+ with +pattern+ replaced by +rewrite+.
1139
- *
1140
- * @param [String] str the string to modify
1141
- * @param [String, RE2::Regexp] pattern a regexp matching text to be replaced
1142
- * @param [String] rewrite the string to replace with
1143
- * @return [String] the resulting string
1144
- * @example
1145
- * re2 = RE2.new("oo?")
1146
- * RE2.GlobalReplace("whoops-doops", re2, "e") #=> "wheps-deps"
1147
- * RE2.GlobalReplace("hello there", "e", "i") #=> "hillo thiri"
1148
- */
1149
- static VALUE re2_GlobalReplace(VALUE self, VALUE str, VALUE pattern,
1150
- VALUE rewrite) {
1151
- UNUSED(self);
1152
-
1153
- /* Convert all the inputs to be pumped into RE2::GlobalReplace. */
1154
- re2_pattern *p;
1155
- string str_as_string(StringValuePtr(str));
1156
-
1157
- /* Do the replacement. */
1158
- if (rb_obj_is_kind_of(pattern, re2_cRegexp)) {
1159
- Data_Get_Struct(pattern, re2_pattern, p);
1160
- RE2::GlobalReplace(&str_as_string, *p->pattern, StringValuePtr(rewrite));
1161
-
1162
- return ENCODED_STR_NEW(str_as_string.data(), str_as_string.size(),
1163
- p->pattern->options().utf8() ? "UTF-8" : "ISO-8859-1");
1164
- } else {
1165
- RE2::GlobalReplace(&str_as_string, StringValuePtr(pattern),
1166
- StringValuePtr(rewrite));
1160
+ /* Do the replacement. */
1161
+ if (rb_obj_is_kind_of(pattern, re2_cRegexp)) {
1162
+ Data_Get_Struct(pattern, re2_pattern, p);
1163
+ RE2::GlobalReplace(&str_as_string, *p->pattern, StringValuePtr(rewrite));
1167
1164
 
1168
- return ENCODED_STR_NEW2(str_as_string.data(), str_as_string.size(),
1169
- pattern);
1170
- }
1171
- }
1165
+ return ENCODED_STR_NEW(str_as_string.data(), str_as_string.size(),
1166
+ p->pattern->options().utf8() ? "UTF-8" : "ISO-8859-1");
1167
+ } else {
1168
+ RE2::GlobalReplace(&str_as_string, StringValuePtr(pattern),
1169
+ StringValuePtr(rewrite));
1172
1170
 
1173
- /*
1174
- * Returns a version of str with all potentially meaningful regexp
1175
- * characters escaped. The returned string, used as a regular
1176
- * expression, will exactly match the original string.
1177
- *
1178
- * @param [String] unquoted the unquoted string
1179
- * @return [String] the escaped string
1180
- * @example
1181
- * RE2::Regexp.escape("1.5-2.0?") #=> "1\.5\-2\.0\?"
1182
- */
1183
- static VALUE re2_QuoteMeta(VALUE self, VALUE unquoted) {
1184
- UNUSED(self);
1185
- string quoted_string = RE2::QuoteMeta(StringValuePtr(unquoted));
1186
- return rb_str_new(quoted_string.data(), quoted_string.size());
1171
+ return ENCODED_STR_NEW2(str_as_string.data(), str_as_string.size(),
1172
+ pattern);
1187
1173
  }
1174
+ }
1188
1175
 
1189
- void Init_re2(void) {
1190
- re2_mRE2 = rb_define_module("RE2");
1191
- re2_cRegexp = rb_define_class_under(re2_mRE2, "Regexp", rb_cObject);
1192
- re2_cMatchData = rb_define_class_under(re2_mRE2, "MatchData", rb_cObject);
1193
- re2_cScanner = rb_define_class_under(re2_mRE2, "Scanner", rb_cObject);
1194
-
1195
- rb_define_alloc_func(re2_cRegexp, (VALUE (*)(VALUE))re2_regexp_allocate);
1196
- rb_define_alloc_func(re2_cMatchData,
1197
- (VALUE (*)(VALUE))re2_matchdata_allocate);
1198
- rb_define_alloc_func(re2_cScanner,
1199
- (VALUE (*)(VALUE))re2_scanner_allocate);
1200
-
1201
- rb_define_method(re2_cMatchData, "string",
1202
- RUBY_METHOD_FUNC(re2_matchdata_string), 0);
1203
- rb_define_method(re2_cMatchData, "regexp",
1204
- RUBY_METHOD_FUNC(re2_matchdata_regexp), 0);
1205
- rb_define_method(re2_cMatchData, "to_a",
1206
- RUBY_METHOD_FUNC(re2_matchdata_to_a), 0);
1207
- rb_define_method(re2_cMatchData, "size",
1208
- RUBY_METHOD_FUNC(re2_matchdata_size), 0);
1209
- rb_define_method(re2_cMatchData, "length",
1210
- RUBY_METHOD_FUNC(re2_matchdata_size), 0);
1211
- rb_define_method(re2_cMatchData, "[]", RUBY_METHOD_FUNC(re2_matchdata_aref),
1212
- -1); rb_define_method(re2_cMatchData, "to_s",
1213
- RUBY_METHOD_FUNC(re2_matchdata_to_s), 0);
1214
- rb_define_method(re2_cMatchData, "inspect",
1215
- RUBY_METHOD_FUNC(re2_matchdata_inspect), 0);
1216
-
1217
- rb_define_method(re2_cScanner, "string",
1218
- RUBY_METHOD_FUNC(re2_scanner_string), 0);
1219
- rb_define_method(re2_cScanner, "regexp",
1220
- RUBY_METHOD_FUNC(re2_scanner_regexp), 0);
1221
- rb_define_method(re2_cScanner, "scan",
1222
- RUBY_METHOD_FUNC(re2_scanner_scan), 0);
1223
- rb_define_method(re2_cScanner, "rewind",
1224
- RUBY_METHOD_FUNC(re2_scanner_rewind), 0);
1225
-
1226
- rb_define_method(re2_cRegexp, "initialize",
1227
- RUBY_METHOD_FUNC(re2_regexp_initialize), -1);
1228
- rb_define_method(re2_cRegexp, "ok?", RUBY_METHOD_FUNC(re2_regexp_ok), 0);
1229
- rb_define_method(re2_cRegexp, "error", RUBY_METHOD_FUNC(re2_regexp_error),
1230
- 0);
1231
- rb_define_method(re2_cRegexp, "error_arg",
1232
- RUBY_METHOD_FUNC(re2_regexp_error_arg), 0);
1233
- rb_define_method(re2_cRegexp, "program_size",
1234
- RUBY_METHOD_FUNC(re2_regexp_program_size), 0);
1235
- rb_define_method(re2_cRegexp, "options",
1236
- RUBY_METHOD_FUNC(re2_regexp_options), 0);
1237
- rb_define_method(re2_cRegexp, "number_of_capturing_groups",
1238
- RUBY_METHOD_FUNC(re2_regexp_number_of_capturing_groups), 0);
1239
- rb_define_method(re2_cRegexp, "named_capturing_groups",
1240
- RUBY_METHOD_FUNC(re2_regexp_named_capturing_groups), 0);
1241
- rb_define_method(re2_cRegexp, "match", RUBY_METHOD_FUNC(re2_regexp_match),
1242
- -1);
1243
- rb_define_method(re2_cRegexp, "match?",
1244
- RUBY_METHOD_FUNC(re2_regexp_match_query), 1);
1245
- rb_define_method(re2_cRegexp, "=~",
1246
- RUBY_METHOD_FUNC(re2_regexp_match_query), 1);
1247
- rb_define_method(re2_cRegexp, "===",
1248
- RUBY_METHOD_FUNC(re2_regexp_match_query), 1);
1249
- rb_define_method(re2_cRegexp, "scan",
1250
- RUBY_METHOD_FUNC(re2_regexp_scan), 1);
1251
- rb_define_method(re2_cRegexp, "to_s", RUBY_METHOD_FUNC(re2_regexp_to_s), 0);
1252
- rb_define_method(re2_cRegexp, "to_str", RUBY_METHOD_FUNC(re2_regexp_to_s),
1253
- 0);
1254
- rb_define_method(re2_cRegexp, "pattern", RUBY_METHOD_FUNC(re2_regexp_to_s),
1255
- 0);
1256
- rb_define_method(re2_cRegexp, "source", RUBY_METHOD_FUNC(re2_regexp_to_s),
1257
- 0);
1258
- rb_define_method(re2_cRegexp, "inspect",
1259
- RUBY_METHOD_FUNC(re2_regexp_inspect), 0);
1260
- rb_define_method(re2_cRegexp, "utf8?", RUBY_METHOD_FUNC(re2_regexp_utf8),
1261
- 0);
1262
- rb_define_method(re2_cRegexp, "posix_syntax?",
1263
- RUBY_METHOD_FUNC(re2_regexp_posix_syntax), 0);
1264
- rb_define_method(re2_cRegexp, "longest_match?",
1265
- RUBY_METHOD_FUNC(re2_regexp_longest_match), 0);
1266
- rb_define_method(re2_cRegexp, "log_errors?",
1267
- RUBY_METHOD_FUNC(re2_regexp_log_errors), 0);
1268
- rb_define_method(re2_cRegexp, "max_mem",
1269
- RUBY_METHOD_FUNC(re2_regexp_max_mem), 0);
1270
- rb_define_method(re2_cRegexp, "literal?",
1271
- RUBY_METHOD_FUNC(re2_regexp_literal), 0);
1272
- rb_define_method(re2_cRegexp, "never_nl?",
1273
- RUBY_METHOD_FUNC(re2_regexp_never_nl), 0);
1274
- rb_define_method(re2_cRegexp, "case_sensitive?",
1275
- RUBY_METHOD_FUNC(re2_regexp_case_sensitive), 0);
1276
- rb_define_method(re2_cRegexp, "case_insensitive?",
1277
- RUBY_METHOD_FUNC(re2_regexp_case_insensitive), 0);
1278
- rb_define_method(re2_cRegexp, "casefold?",
1279
- RUBY_METHOD_FUNC(re2_regexp_case_insensitive), 0);
1280
- rb_define_method(re2_cRegexp, "perl_classes?",
1281
- RUBY_METHOD_FUNC(re2_regexp_perl_classes), 0);
1282
- rb_define_method(re2_cRegexp, "word_boundary?",
1283
- RUBY_METHOD_FUNC(re2_regexp_word_boundary), 0);
1284
- rb_define_method(re2_cRegexp, "one_line?",
1285
- RUBY_METHOD_FUNC(re2_regexp_one_line), 0);
1286
-
1287
- rb_define_module_function(re2_mRE2, "Replace",
1288
- RUBY_METHOD_FUNC(re2_Replace), 3);
1289
- rb_define_module_function(re2_mRE2, "GlobalReplace",
1290
- RUBY_METHOD_FUNC(re2_GlobalReplace), 3);
1291
- rb_define_module_function(re2_mRE2, "QuoteMeta",
1292
- RUBY_METHOD_FUNC(re2_QuoteMeta), 1);
1293
- rb_define_singleton_method(re2_cRegexp, "escape",
1294
- RUBY_METHOD_FUNC(re2_QuoteMeta), 1);
1295
- rb_define_singleton_method(re2_cRegexp, "quote",
1296
- RUBY_METHOD_FUNC(re2_QuoteMeta), 1);
1297
- rb_define_singleton_method(re2_cRegexp, "compile",
1298
- RUBY_METHOD_FUNC(rb_class_new_instance), -1);
1299
-
1300
- rb_define_global_function("RE2", RUBY_METHOD_FUNC(re2_re2), -1);
1301
-
1302
- /* Create the symbols used in options. */
1303
- id_utf8 = rb_intern("utf8");
1304
- id_posix_syntax = rb_intern("posix_syntax");
1305
- id_longest_match = rb_intern("longest_match");
1306
- id_log_errors = rb_intern("log_errors");
1307
- id_max_mem = rb_intern("max_mem");
1308
- id_literal = rb_intern("literal");
1309
- id_never_nl = rb_intern("never_nl");
1310
- id_case_sensitive = rb_intern("case_sensitive");
1311
- id_perl_classes = rb_intern("perl_classes");
1312
- id_word_boundary = rb_intern("word_boundary");
1313
- id_one_line = rb_intern("one_line");
1314
-
1315
- #if 0
1316
- /* Fake so YARD generates the file. */
1317
- rb_mKernel = rb_define_module("Kernel");
1318
- #endif
1319
- }
1176
+ /*
1177
+ * Returns a version of str with all potentially meaningful regexp
1178
+ * characters escaped. The returned string, used as a regular
1179
+ * expression, will exactly match the original string.
1180
+ *
1181
+ * @param [String] unquoted the unquoted string
1182
+ * @return [String] the escaped string
1183
+ * @example
1184
+ * RE2::Regexp.escape("1.5-2.0?") #=> "1\.5\-2\.0\?"
1185
+ */
1186
+ static VALUE re2_QuoteMeta(VALUE self, VALUE unquoted) {
1187
+ UNUSED(self);
1188
+ string quoted_string = RE2::QuoteMeta(StringValuePtr(unquoted));
1189
+ return rb_str_new(quoted_string.data(), quoted_string.size());
1190
+ }
1191
+
1192
+ /* Forward declare Init_re2 to be called by C code but define it separately so
1193
+ * that YARD can parse it.
1194
+ */
1195
+ extern "C" void Init_re2(void);
1196
+
1197
+ void Init_re2(void) {
1198
+ re2_mRE2 = rb_define_module("RE2");
1199
+ re2_cRegexp = rb_define_class_under(re2_mRE2, "Regexp", rb_cObject);
1200
+ re2_cMatchData = rb_define_class_under(re2_mRE2, "MatchData", rb_cObject);
1201
+ re2_cScanner = rb_define_class_under(re2_mRE2, "Scanner", rb_cObject);
1202
+
1203
+ rb_define_alloc_func(re2_cRegexp, (VALUE (*)(VALUE))re2_regexp_allocate);
1204
+ rb_define_alloc_func(re2_cMatchData,
1205
+ (VALUE (*)(VALUE))re2_matchdata_allocate);
1206
+ rb_define_alloc_func(re2_cScanner,
1207
+ (VALUE (*)(VALUE))re2_scanner_allocate);
1208
+
1209
+ rb_define_method(re2_cMatchData, "string",
1210
+ RUBY_METHOD_FUNC(re2_matchdata_string), 0);
1211
+ rb_define_method(re2_cMatchData, "regexp",
1212
+ RUBY_METHOD_FUNC(re2_matchdata_regexp), 0);
1213
+ rb_define_method(re2_cMatchData, "to_a",
1214
+ RUBY_METHOD_FUNC(re2_matchdata_to_a), 0);
1215
+ rb_define_method(re2_cMatchData, "size",
1216
+ RUBY_METHOD_FUNC(re2_matchdata_size), 0);
1217
+ rb_define_method(re2_cMatchData, "length",
1218
+ RUBY_METHOD_FUNC(re2_matchdata_size), 0);
1219
+ rb_define_method(re2_cMatchData, "[]", RUBY_METHOD_FUNC(re2_matchdata_aref),
1220
+ -1); rb_define_method(re2_cMatchData, "to_s",
1221
+ RUBY_METHOD_FUNC(re2_matchdata_to_s), 0);
1222
+ rb_define_method(re2_cMatchData, "inspect",
1223
+ RUBY_METHOD_FUNC(re2_matchdata_inspect), 0);
1224
+
1225
+ rb_define_method(re2_cScanner, "string",
1226
+ RUBY_METHOD_FUNC(re2_scanner_string), 0);
1227
+ rb_define_method(re2_cScanner, "regexp",
1228
+ RUBY_METHOD_FUNC(re2_scanner_regexp), 0);
1229
+ rb_define_method(re2_cScanner, "scan",
1230
+ RUBY_METHOD_FUNC(re2_scanner_scan), 0);
1231
+ rb_define_method(re2_cScanner, "rewind",
1232
+ RUBY_METHOD_FUNC(re2_scanner_rewind), 0);
1233
+
1234
+ rb_define_method(re2_cRegexp, "initialize",
1235
+ RUBY_METHOD_FUNC(re2_regexp_initialize), -1);
1236
+ rb_define_method(re2_cRegexp, "ok?", RUBY_METHOD_FUNC(re2_regexp_ok), 0);
1237
+ rb_define_method(re2_cRegexp, "error", RUBY_METHOD_FUNC(re2_regexp_error),
1238
+ 0);
1239
+ rb_define_method(re2_cRegexp, "error_arg",
1240
+ RUBY_METHOD_FUNC(re2_regexp_error_arg), 0);
1241
+ rb_define_method(re2_cRegexp, "program_size",
1242
+ RUBY_METHOD_FUNC(re2_regexp_program_size), 0);
1243
+ rb_define_method(re2_cRegexp, "options",
1244
+ RUBY_METHOD_FUNC(re2_regexp_options), 0);
1245
+ rb_define_method(re2_cRegexp, "number_of_capturing_groups",
1246
+ RUBY_METHOD_FUNC(re2_regexp_number_of_capturing_groups), 0);
1247
+ rb_define_method(re2_cRegexp, "named_capturing_groups",
1248
+ RUBY_METHOD_FUNC(re2_regexp_named_capturing_groups), 0);
1249
+ rb_define_method(re2_cRegexp, "match", RUBY_METHOD_FUNC(re2_regexp_match),
1250
+ -1);
1251
+ rb_define_method(re2_cRegexp, "match?",
1252
+ RUBY_METHOD_FUNC(re2_regexp_match_query), 1);
1253
+ rb_define_method(re2_cRegexp, "=~",
1254
+ RUBY_METHOD_FUNC(re2_regexp_match_query), 1);
1255
+ rb_define_method(re2_cRegexp, "===",
1256
+ RUBY_METHOD_FUNC(re2_regexp_match_query), 1);
1257
+ rb_define_method(re2_cRegexp, "scan",
1258
+ RUBY_METHOD_FUNC(re2_regexp_scan), 1);
1259
+ rb_define_method(re2_cRegexp, "to_s", RUBY_METHOD_FUNC(re2_regexp_to_s), 0);
1260
+ rb_define_method(re2_cRegexp, "to_str", RUBY_METHOD_FUNC(re2_regexp_to_s),
1261
+ 0);
1262
+ rb_define_method(re2_cRegexp, "pattern", RUBY_METHOD_FUNC(re2_regexp_to_s),
1263
+ 0);
1264
+ rb_define_method(re2_cRegexp, "source", RUBY_METHOD_FUNC(re2_regexp_to_s),
1265
+ 0);
1266
+ rb_define_method(re2_cRegexp, "inspect",
1267
+ RUBY_METHOD_FUNC(re2_regexp_inspect), 0);
1268
+ rb_define_method(re2_cRegexp, "utf8?", RUBY_METHOD_FUNC(re2_regexp_utf8),
1269
+ 0);
1270
+ rb_define_method(re2_cRegexp, "posix_syntax?",
1271
+ RUBY_METHOD_FUNC(re2_regexp_posix_syntax), 0);
1272
+ rb_define_method(re2_cRegexp, "longest_match?",
1273
+ RUBY_METHOD_FUNC(re2_regexp_longest_match), 0);
1274
+ rb_define_method(re2_cRegexp, "log_errors?",
1275
+ RUBY_METHOD_FUNC(re2_regexp_log_errors), 0);
1276
+ rb_define_method(re2_cRegexp, "max_mem",
1277
+ RUBY_METHOD_FUNC(re2_regexp_max_mem), 0);
1278
+ rb_define_method(re2_cRegexp, "literal?",
1279
+ RUBY_METHOD_FUNC(re2_regexp_literal), 0);
1280
+ rb_define_method(re2_cRegexp, "never_nl?",
1281
+ RUBY_METHOD_FUNC(re2_regexp_never_nl), 0);
1282
+ rb_define_method(re2_cRegexp, "case_sensitive?",
1283
+ RUBY_METHOD_FUNC(re2_regexp_case_sensitive), 0);
1284
+ rb_define_method(re2_cRegexp, "case_insensitive?",
1285
+ RUBY_METHOD_FUNC(re2_regexp_case_insensitive), 0);
1286
+ rb_define_method(re2_cRegexp, "casefold?",
1287
+ RUBY_METHOD_FUNC(re2_regexp_case_insensitive), 0);
1288
+ rb_define_method(re2_cRegexp, "perl_classes?",
1289
+ RUBY_METHOD_FUNC(re2_regexp_perl_classes), 0);
1290
+ rb_define_method(re2_cRegexp, "word_boundary?",
1291
+ RUBY_METHOD_FUNC(re2_regexp_word_boundary), 0);
1292
+ rb_define_method(re2_cRegexp, "one_line?",
1293
+ RUBY_METHOD_FUNC(re2_regexp_one_line), 0);
1294
+
1295
+ rb_define_module_function(re2_mRE2, "Replace",
1296
+ RUBY_METHOD_FUNC(re2_Replace), 3);
1297
+ rb_define_module_function(re2_mRE2, "GlobalReplace",
1298
+ RUBY_METHOD_FUNC(re2_GlobalReplace), 3);
1299
+ rb_define_module_function(re2_mRE2, "QuoteMeta",
1300
+ RUBY_METHOD_FUNC(re2_QuoteMeta), 1);
1301
+ rb_define_singleton_method(re2_cRegexp, "escape",
1302
+ RUBY_METHOD_FUNC(re2_QuoteMeta), 1);
1303
+ rb_define_singleton_method(re2_cRegexp, "quote",
1304
+ RUBY_METHOD_FUNC(re2_QuoteMeta), 1);
1305
+ rb_define_singleton_method(re2_cRegexp, "compile",
1306
+ RUBY_METHOD_FUNC(rb_class_new_instance), -1);
1307
+
1308
+ rb_define_global_function("RE2", RUBY_METHOD_FUNC(re2_re2), -1);
1309
+
1310
+ /* Create the symbols used in options. */
1311
+ id_utf8 = rb_intern("utf8");
1312
+ id_posix_syntax = rb_intern("posix_syntax");
1313
+ id_longest_match = rb_intern("longest_match");
1314
+ id_log_errors = rb_intern("log_errors");
1315
+ id_max_mem = rb_intern("max_mem");
1316
+ id_literal = rb_intern("literal");
1317
+ id_never_nl = rb_intern("never_nl");
1318
+ id_case_sensitive = rb_intern("case_sensitive");
1319
+ id_perl_classes = rb_intern("perl_classes");
1320
+ id_word_boundary = rb_intern("word_boundary");
1321
+ id_one_line = rb_intern("one_line");
1322
+
1323
+ #if 0
1324
+ /* Fake so YARD generates the file. */
1325
+ rb_mKernel = rb_define_module("Kernel");
1326
+ #endif
1320
1327
  }