re2 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,28 @@
1
+ Copyright (c) 2010, Paul Mucur.
2
+ All rights reserved.
3
+
4
+ Redistribution and use in source and binary forms, with or without
5
+ modification, are permitted provided that the following conditions are met:
6
+
7
+ * Redistributions of source code must retain the above copyright notice, this
8
+ list of conditions and the following disclaimer.
9
+
10
+ * Redistributions in binary form must reproduce the above copyright notice,
11
+ this list of conditions and the following disclaimer in the documentation
12
+ and/or other materials provided with the distribution.
13
+
14
+ * Neither the name of Paul Mucur, nor the names of its contributors may be
15
+ used to endorse or promote products derived from this software without
16
+ specific prior written permission.
17
+
18
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
19
+ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
+
@@ -0,0 +1,70 @@
1
+ re2
2
+ ===
3
+
4
+ A Ruby binding to [re2][], an "efficient, principled regular expression library".
5
+
6
+ Installation
7
+ ------------
8
+
9
+ You will need [re2][] installed in its default location of /usr/local as well as a C++ compiler such as [gcc][] (on Debian and Ubuntu, this is provided by the [build-essential][] package).
10
+
11
+ If you are using a packaged Ruby distribution, make sure you also have the Ruby header files installed such as those provided by the [ruby-dev][] package on Debian and Ubuntu.
12
+
13
+ You can then install the library via RubyGems: `gem install re2`
14
+
15
+ Usage
16
+ -----
17
+
18
+ You can use re2 as a mostly drop-in replacement for Ruby's own [Regexp][] class:
19
+
20
+ $ irb -rubygems
21
+ > require 're2'
22
+ > r = RE2.compile('w(\d)(\d+)')
23
+ => /w(\d)(\d+)/
24
+ > r.match("w1234")
25
+ => ["w1234", "1", "234"]
26
+ > r =~ "w1234"
27
+ => true
28
+ > r !~ "bob"
29
+ => true
30
+ > r.match("bob")
31
+ => nil
32
+
33
+ Features
34
+ --------
35
+
36
+ * Pre-compiling regular expressions with [`RE2.new(re)`](http://code.google.com/p/re2/source/browse/re2/re2.h#96), `RE2.compile(re)` or `RE2(re)` (including specifying options, e.g. `RE2.new("pattern", :case_sensitive => false)`
37
+
38
+ * Extracting matches with `re2.match(text)` (and an exact number of matches with `re2.match(text, number_of_matches)` such as `re2.match("123-234", 2)`)
39
+
40
+ * Checking for matches with `re2 =~ text`, `re2 === text` (for use in `case` statements) and `re2 !~ text`
41
+
42
+ * Checking regular expression compilation with `re2.ok?`, `re2.error` and `re2.error_arg`
43
+
44
+ * Checking regular expression "cost" with `re2.program_size`
45
+
46
+ * Checking the options for an expression with `re2.options` or individually with `re2.case_sensitive?`
47
+
48
+ * Performing full matches with [`RE2::FullMatch(text, re)`](http://code.google.com/p/re2/source/browse/re2/re2.h#30)
49
+
50
+ * Performing partial matches with [`RE2::PartialMatch(text, re)`](http://code.google.com/p/re2/source/browse/re2/re2.h#82)
51
+
52
+ * Performing in-place replacement with [`RE2::Replace(str, pattern, replace)`](http://code.google.com/p/re2/source/browse/re2/re2.h#335)
53
+
54
+ * Performing in-place global replacement with [`RE2::GlobalReplace(str, pattern, replace)`](http://code.google.com/p/re2/source/browse/re2/re2.h#352)
55
+
56
+ * Escaping regular expressions with [`RE2::QuoteMeta(unquoted)`](http://code.google.com/p/re2/source/browse/re2/re2.h#377), `RE2.escape(unquoted)` or `RE2.quote(unquoted)`
57
+
58
+ re2.cc should be well-documented so feel free to consult this file to see what can currently be used.
59
+
60
+ Contact
61
+ -------
62
+
63
+ All feedback should go to the mailing list: ruby.re2@librelist.com
64
+
65
+ [re2]: http://code.google.com/p/re2/
66
+ [gcc]: http://gcc.gnu.org/
67
+ [ruby-dev]: http://packages.debian.org/ruby-dev
68
+ [build-essential]: http://packages.debian.org/build-essential
69
+ [Regexp]: http://ruby-doc.org/core/classes/Regexp.html
70
+
@@ -0,0 +1,19 @@
1
+ begin
2
+ require 'rake/extensiontask'
3
+ require 'rake/testtask'
4
+ rescue LoadError
5
+ require 'rubygems'
6
+ require 'rake/extensiontask'
7
+ require 'rake/testtask'
8
+ end
9
+
10
+ Rake::ExtensionTask.new('re2')
11
+
12
+ Rake::TestTask.new do |t|
13
+ t.test_files = FileList["test/*_test.rb"]
14
+ t.verbose = true
15
+ end
16
+
17
+ task :test => :compile
18
+ task :default => :test
19
+
@@ -0,0 +1,18 @@
1
+ # re2 (http://github.com/mudge/re2)
2
+ # Ruby bindings to re2, an "efficient, principled regular expression library"
3
+ #
4
+ # Copyright (c) 2010, Paul Mucur (http://mucur.name)
5
+ # Released under the BSD Licence, please see LICENSE.txt
6
+
7
+ require 'mkmf'
8
+
9
+ incl, lib = dir_config("re2", "/usr/local/include", "/usr/local/lib")
10
+
11
+ $CFLAGS << " -Wall -Wextra -funroll-loops"
12
+
13
+ have_library("stdc++")
14
+ if have_library("re2")
15
+ create_makefile("re2")
16
+ else
17
+ abort "You must have re2 installed and specified with --with-re2-dir, please see http://code.google.com/p/re2/wiki/Install"
18
+ end
@@ -0,0 +1,1026 @@
1
+ /*
2
+ * re2 (http://github.com/mudge/re2)
3
+ * Ruby bindings to re2, an "efficient, principled regular expression library"
4
+ *
5
+ * Copyright (c) 2010, Paul Mucur (http://mucur.name)
6
+ * Released under the BSD Licence, please see LICENSE.txt
7
+ */
8
+
9
+ #include <re2/re2.h>
10
+
11
+ extern "C" {
12
+
13
+ #include <ruby.h>
14
+
15
+ #define BOOL2RUBY(v) (v ? Qtrue : Qfalse)
16
+ #define UNUSED(x) ((void)x)
17
+
18
+ #if !defined(RSTRING_LEN)
19
+ # define RSTRING_LEN(x) (RSTRING(x)->len)
20
+ #endif
21
+
22
+ typedef struct _re2p {
23
+ RE2 *pattern;
24
+ } re2_pattern;
25
+
26
+ VALUE re2_cRE2;
27
+
28
+ /* Symbols used in RE2 options. */
29
+ static ID id_utf8, id_posix_syntax, id_longest_match, id_log_errors,
30
+ id_max_mem, id_literal, id_never_nl, id_case_sensitive,
31
+ id_perl_classes, id_word_boundary, id_one_line;
32
+
33
+ void
34
+ re2_free(re2_pattern* self)
35
+ {
36
+ free(self);
37
+ }
38
+
39
+ static VALUE
40
+ re2_allocate(VALUE klass)
41
+ {
42
+ re2_pattern *p = (re2_pattern*)malloc(sizeof(re2_pattern));
43
+ p->pattern = NULL;
44
+ return Data_Wrap_Struct(klass, 0, re2_free, p);
45
+ }
46
+
47
+ /*
48
+ * call-seq:
49
+ * RE2(pattern) -> re2
50
+ * RE2(pattern, options) -> re2
51
+ *
52
+ * Returns a new RE2 object with a compiled version of
53
+ * +pattern+ stored inside. Equivalent to +RE2.new+.
54
+ */
55
+ static VALUE
56
+ re2_re2(int argc, VALUE *argv, VALUE self)
57
+ {
58
+ UNUSED(self);
59
+ return rb_class_new_instance(argc, argv, re2_cRE2);
60
+ }
61
+
62
+ /*
63
+ * call-seq:
64
+ * RE2.new(pattern) -> re2
65
+ * RE2.new(pattern, options) -> re2
66
+ * RE2.compile(pattern) -> re2
67
+ * RE2.compile(pattern, options) -> re2
68
+ *
69
+ * Returns a new RE2 object with a compiled version of
70
+ * +pattern+ stored inside.
71
+ *
72
+ * Options can be a hash with the following keys:
73
+ *
74
+ * :utf8 - text and pattern are UTF-8; otherwise
75
+ * Latin-1 (default true)
76
+ *
77
+ * :posix_syntax - restrict regexps to POSIX egrep syntax
78
+ * (default false)
79
+ *
80
+ * :longest_match - search for longest match, not first match
81
+ * (default false)
82
+ *
83
+ * :log_errors - log syntax and execution errors to ERROR
84
+ * (default true)
85
+ *
86
+ * :max_mem - approx. max memory footprint of RE2
87
+ *
88
+ * :literal - interpret string as literal, not regexp
89
+ * (default false)
90
+ *
91
+ * :never_nl - never match \n, even if it is in regexp
92
+ * (default false)
93
+ *
94
+ * :case_sensitive - match is case-sensitive (regexp can override
95
+ * with (?i) unless in posix_syntax mode)
96
+ * (default true)
97
+ *
98
+ * :perl_classes - allow Perl's \d \s \w \D \S \W when in
99
+ * posix_syntax mode (default false)
100
+ *
101
+ * :word_boundary - allow \b \B (word boundary and not) when
102
+ * in posix_syntax mode (default false)
103
+ *
104
+ * :one_line - ^ and $ only match beginning and end of text
105
+ * when in posix_syntax mode (default false)
106
+ */
107
+ static VALUE
108
+ re2_initialize(int argc, VALUE *argv, VALUE self)
109
+ {
110
+ VALUE pattern, options, utf8, posix_syntax, longest_match, log_errors,
111
+ max_mem, literal, never_nl, case_sensitive, perl_classes,
112
+ word_boundary, one_line;
113
+ re2_pattern *p;
114
+ RE2::Options *re2_options;
115
+
116
+ rb_scan_args(argc, argv, "11", &pattern, &options);
117
+ Data_Get_Struct(self, re2_pattern, p);
118
+
119
+ if (RTEST(options)) {
120
+ if (TYPE(options) != T_HASH) {
121
+ rb_raise(rb_eArgError, "options should be a hash");
122
+ }
123
+
124
+ re2_options = new RE2::Options();
125
+
126
+ utf8 = rb_hash_aref(options, ID2SYM(id_utf8));
127
+ if (!NIL_P(utf8)) {
128
+ re2_options->set_utf8(RTEST(utf8));
129
+ }
130
+
131
+ posix_syntax = rb_hash_aref(options, ID2SYM(id_posix_syntax));
132
+ if (!NIL_P(posix_syntax)) {
133
+ re2_options->set_posix_syntax(RTEST(posix_syntax));
134
+ }
135
+
136
+ longest_match = rb_hash_aref(options, ID2SYM(id_longest_match));
137
+ if (!NIL_P(longest_match)) {
138
+ re2_options->set_longest_match(RTEST(longest_match));
139
+ }
140
+
141
+ log_errors = rb_hash_aref(options, ID2SYM(id_log_errors));
142
+ if (!NIL_P(log_errors)) {
143
+ re2_options->set_log_errors(RTEST(log_errors));
144
+ }
145
+
146
+ max_mem = rb_hash_aref(options, ID2SYM(id_max_mem));
147
+ if (!NIL_P(max_mem)) {
148
+ re2_options->set_max_mem(NUM2INT(max_mem));
149
+ }
150
+
151
+ literal = rb_hash_aref(options, ID2SYM(id_literal));
152
+ if (!NIL_P(literal)) {
153
+ re2_options->set_literal(RTEST(literal));
154
+ }
155
+
156
+ never_nl = rb_hash_aref(options, ID2SYM(id_never_nl));
157
+ if (!NIL_P(never_nl)) {
158
+ re2_options->set_never_nl(RTEST(never_nl));
159
+ }
160
+
161
+ case_sensitive = rb_hash_aref(options, ID2SYM(id_case_sensitive));
162
+ if (!NIL_P(case_sensitive)) {
163
+ re2_options->set_case_sensitive(RTEST(case_sensitive));
164
+ }
165
+
166
+ perl_classes = rb_hash_aref(options, ID2SYM(id_perl_classes));
167
+ if (!NIL_P(perl_classes)) {
168
+ re2_options->set_perl_classes(RTEST(perl_classes));
169
+ }
170
+
171
+ word_boundary = rb_hash_aref(options, ID2SYM(id_word_boundary));
172
+ if (!NIL_P(word_boundary)) {
173
+ re2_options->set_word_boundary(RTEST(word_boundary));
174
+ }
175
+
176
+ one_line = rb_hash_aref(options, ID2SYM(id_one_line));
177
+ if (!NIL_P(one_line)) {
178
+ re2_options->set_one_line(RTEST(one_line));
179
+ }
180
+
181
+ p->pattern = new RE2(StringValuePtr(pattern), *re2_options);
182
+ } else {
183
+ p->pattern = new RE2(StringValuePtr(pattern));
184
+ }
185
+
186
+ return self;
187
+ }
188
+
189
+ /*
190
+ * call-seq:
191
+ * re2.inspect -> string
192
+ *
193
+ * Returns a printable version of the regular expression +re2+,
194
+ * surrounded by forward slashes.
195
+ *
196
+ * re2 = RE2.new("woo?")
197
+ * re2.inspect #=> "/woo?/"
198
+ */
199
+ static VALUE
200
+ re2_inspect(VALUE self)
201
+ {
202
+ VALUE result = rb_str_buf_new(0);
203
+ re2_pattern *p;
204
+
205
+ rb_str_buf_cat2(result, "/");
206
+ Data_Get_Struct(self, re2_pattern, p);
207
+ rb_str_buf_cat2(result, p->pattern->pattern().c_str());
208
+ rb_str_buf_cat2(result, "/");
209
+
210
+ return result;
211
+ }
212
+
213
+ /*
214
+ * call-seq:
215
+ * re2.to_s -> string
216
+ * re2.to_str -> string
217
+ * re2.pattern -> string
218
+ * re2.source -> string
219
+ * re2.inspect -> string
220
+ *
221
+ * Returns a string version of the regular expression +re2+.
222
+ *
223
+ * re2 = RE2.new("woo?")
224
+ * re2.to_s #=> "woo?"
225
+ */
226
+ static VALUE
227
+ re2_to_s(VALUE self)
228
+ {
229
+ re2_pattern *p;
230
+ Data_Get_Struct(self, re2_pattern, p);
231
+ return rb_str_new2(p->pattern->pattern().c_str());
232
+ }
233
+
234
+ /*
235
+ * call-seq:
236
+ * re2.ok? -> true or false
237
+ *
238
+ * Returns whether or not the regular expression +re2+
239
+ * was compiled successfully or not.
240
+ *
241
+ * re2 = RE2.new("woo?")
242
+ * re2.ok? #=> true
243
+ */
244
+ static VALUE
245
+ re2_ok(VALUE self)
246
+ {
247
+ re2_pattern *p;
248
+ Data_Get_Struct(self, re2_pattern, p);
249
+ return BOOL2RUBY(p->pattern->ok());
250
+ }
251
+
252
+ /*
253
+ * call-seq:
254
+ * re2.utf8? -> true or false
255
+ *
256
+ * Returns whether or not the regular expression +re2+
257
+ * was compiled with the utf8 option set to true.
258
+ *
259
+ * re2 = RE2.new("woo?", :utf8 => true)
260
+ * re2.utf8? #=> true
261
+ */
262
+ static VALUE
263
+ re2_utf8(VALUE self)
264
+ {
265
+ re2_pattern *p;
266
+ Data_Get_Struct(self, re2_pattern, p);
267
+ return BOOL2RUBY(p->pattern->options().utf8());
268
+ }
269
+
270
+ /*
271
+ * call-seq:
272
+ * re2.posix_syntax? -> true or false
273
+ *
274
+ * Returns whether or not the regular expression +re2+
275
+ * was compiled with the posix_syntax option set to true.
276
+ *
277
+ * re2 = RE2.new("woo?", :posix_syntax => true)
278
+ * re2.posix_syntax? #=> true
279
+ */
280
+ static VALUE
281
+ re2_posix_syntax(VALUE self)
282
+ {
283
+ re2_pattern *p;
284
+ Data_Get_Struct(self, re2_pattern, p);
285
+ return BOOL2RUBY(p->pattern->options().posix_syntax());
286
+ }
287
+
288
+ /*
289
+ * call-seq:
290
+ * re2.longest_match? -> true or false
291
+ *
292
+ * Returns whether or not the regular expression +re2+
293
+ * was compiled with the longest_match option set to true.
294
+ *
295
+ * re2 = RE2.new("woo?", :longest_match => true)
296
+ * re2.longest_match? #=> true
297
+ */
298
+ static VALUE
299
+ re2_longest_match(VALUE self)
300
+ {
301
+ re2_pattern *p;
302
+ Data_Get_Struct(self, re2_pattern, p);
303
+ return BOOL2RUBY(p->pattern->options().longest_match());
304
+ }
305
+
306
+ /*
307
+ * call-seq:
308
+ * re2.log_errors? -> true or false
309
+ *
310
+ * Returns whether or not the regular expression +re2+
311
+ * was compiled with the log_errors option set to true.
312
+ *
313
+ * re2 = RE2.new("woo?", :log_errors => true)
314
+ * re2.log_errors? #=> true
315
+ */
316
+ static VALUE
317
+ re2_log_errors(VALUE self)
318
+ {
319
+ re2_pattern *p;
320
+ Data_Get_Struct(self, re2_pattern, p);
321
+ return BOOL2RUBY(p->pattern->options().log_errors());
322
+ }
323
+
324
+ /*
325
+ * call-seq:
326
+ * re2.max_mem -> int
327
+ *
328
+ * Returns the max_mem setting for the regular expression
329
+ * +re2+.
330
+ *
331
+ * re2 = RE2.new("woo?", :max_mem => 1024)
332
+ * re2.max_mem #=> 1024
333
+ */
334
+ static VALUE
335
+ re2_max_mem(VALUE self)
336
+ {
337
+ re2_pattern *p;
338
+ Data_Get_Struct(self, re2_pattern, p);
339
+ return INT2FIX(p->pattern->options().max_mem());
340
+ }
341
+
342
+ /*
343
+ * call-seq:
344
+ * re2.literal? -> true or false
345
+ *
346
+ * Returns whether or not the regular expression +re2+
347
+ * was compiled with the literal option set to true.
348
+ *
349
+ * re2 = RE2.new("woo?", :literal => true)
350
+ * re2.literal? #=> true
351
+ */
352
+ static VALUE
353
+ re2_literal(VALUE self)
354
+ {
355
+ re2_pattern *p;
356
+ Data_Get_Struct(self, re2_pattern, p);
357
+ return BOOL2RUBY(p->pattern->options().literal());
358
+ }
359
+
360
+ /*
361
+ * call-seq:
362
+ * re2.never_nl? -> true or false
363
+ *
364
+ * Returns whether or not the regular expression +re2+
365
+ * was compiled with the never_nl option set to true.
366
+ *
367
+ * re2 = RE2.new("woo?", :never_nl => true)
368
+ * re2.never_nl? #=> true
369
+ */
370
+ static VALUE
371
+ re2_never_nl(VALUE self)
372
+ {
373
+ re2_pattern *p;
374
+ Data_Get_Struct(self, re2_pattern, p);
375
+ return BOOL2RUBY(p->pattern->options().never_nl());
376
+ }
377
+
378
+ /*
379
+ * call-seq:
380
+ * re2.case_sensitive? -> true or false
381
+ *
382
+ * Returns whether or not the regular expression +re2+
383
+ * was compiled with the case_sensitive option set to true.
384
+ *
385
+ * re2 = RE2.new("woo?", :case_sensitive => true)
386
+ * re2.case_sensitive? #=> true
387
+ */
388
+ static VALUE
389
+ re2_case_sensitive(VALUE self)
390
+ {
391
+ re2_pattern *p;
392
+ Data_Get_Struct(self, re2_pattern, p);
393
+ return BOOL2RUBY(p->pattern->options().case_sensitive());
394
+ }
395
+
396
+ /*
397
+ * call-seq:
398
+ * re2.case_insensitive? -> true or false
399
+ * re2.casefold? -> true or false
400
+ *
401
+ * Returns whether or not the regular expression +re2+
402
+ * was compiled with the case_sensitive option set to false.
403
+ *
404
+ * re2 = RE2.new("woo?", :case_sensitive => true)
405
+ * re2.case_insensitive? #=> false
406
+ */
407
+ static VALUE
408
+ re2_case_insensitive(VALUE self)
409
+ {
410
+ return BOOL2RUBY(re2_case_sensitive(self) != Qtrue);
411
+ }
412
+
413
+ /*
414
+ * call-seq:
415
+ * re2.perl_classes? -> true or false
416
+ *
417
+ * Returns whether or not the regular expression +re2+
418
+ * was compiled with the perl_classes option set to true.
419
+ *
420
+ * re2 = RE2.new("woo?", :perl_classes => true)
421
+ * re2.perl_classes? #=> true
422
+ */
423
+ static VALUE
424
+ re2_perl_classes(VALUE self)
425
+ {
426
+ re2_pattern *p;
427
+ Data_Get_Struct(self, re2_pattern, p);
428
+ return BOOL2RUBY(p->pattern->options().perl_classes());
429
+ }
430
+
431
+ /*
432
+ * call-seq:
433
+ * re2.word_boundary? -> true or false
434
+ *
435
+ * Returns whether or not the regular expression +re2+
436
+ * was compiled with the word_boundary option set to true.
437
+ *
438
+ * re2 = RE2.new("woo?", :word_boundary => true)
439
+ * re2.word_boundary? #=> true
440
+ */
441
+ static VALUE
442
+ re2_word_boundary(VALUE self)
443
+ {
444
+ re2_pattern *p;
445
+ Data_Get_Struct(self, re2_pattern, p);
446
+ return BOOL2RUBY(p->pattern->options().word_boundary());
447
+ }
448
+
449
+ /*
450
+ * call-seq:
451
+ * re2.one_line? -> true or false
452
+ *
453
+ * Returns whether or not the regular expression +re2+
454
+ * was compiled with the one_line option set to true.
455
+ *
456
+ * re2 = RE2.new("woo?", :one_line => true)
457
+ * re2.one_line? #=> true
458
+ */
459
+ static VALUE
460
+ re2_one_line(VALUE self)
461
+ {
462
+ re2_pattern *p;
463
+ Data_Get_Struct(self, re2_pattern, p);
464
+ return BOOL2RUBY(p->pattern->options().one_line());
465
+ }
466
+
467
+ /*
468
+ * call-seq:
469
+ * re2.error -> error_str
470
+ *
471
+ * If the RE2 could not be created properly, returns an
472
+ * error string.
473
+ */
474
+ static VALUE
475
+ re2_error(VALUE self)
476
+ {
477
+ re2_pattern *p;
478
+ Data_Get_Struct(self, re2_pattern, p);
479
+ return rb_str_new2(p->pattern->error().c_str());
480
+ }
481
+
482
+ /*
483
+ * call-seq:
484
+ * re2.error_arg -> error_str
485
+ *
486
+ * If the RE2 could not be created properly, returns
487
+ * the offending portion of the regexp.
488
+ */
489
+ static VALUE
490
+ re2_error_arg(VALUE self)
491
+ {
492
+ re2_pattern *p;
493
+ Data_Get_Struct(self, re2_pattern, p);
494
+ return rb_str_new2(p->pattern->error_arg().c_str());
495
+ }
496
+
497
+ /*
498
+ * call-seq:
499
+ * re2.program_size -> size
500
+ *
501
+ * Returns the program size, a very approximate measure
502
+ * of a regexp's "cost". Larger numbers are more expensive
503
+ * than smaller numbers.
504
+ */
505
+ static VALUE
506
+ re2_program_size(VALUE self)
507
+ {
508
+ re2_pattern *p;
509
+ Data_Get_Struct(self, re2_pattern, p);
510
+ return INT2FIX(p->pattern->ProgramSize());
511
+ }
512
+
513
+ /*
514
+ * call-seq:
515
+ * re2.options -> options_hash
516
+ *
517
+ * Returns a hash of the options currently set for
518
+ * +re2+.
519
+ */
520
+ static VALUE
521
+ re2_options(VALUE self)
522
+ {
523
+ VALUE options;
524
+ re2_pattern *p;
525
+
526
+ Data_Get_Struct(self, re2_pattern, p);
527
+ options = rb_hash_new();
528
+
529
+ rb_hash_aset(options, ID2SYM(id_utf8),
530
+ BOOL2RUBY(p->pattern->options().utf8()));
531
+
532
+ rb_hash_aset(options, ID2SYM(id_posix_syntax),
533
+ BOOL2RUBY(p->pattern->options().posix_syntax()));
534
+
535
+ rb_hash_aset(options, ID2SYM(id_longest_match),
536
+ BOOL2RUBY(p->pattern->options().longest_match()));
537
+
538
+ rb_hash_aset(options, ID2SYM(id_log_errors),
539
+ BOOL2RUBY(p->pattern->options().log_errors()));
540
+
541
+ rb_hash_aset(options, ID2SYM(id_max_mem),
542
+ INT2FIX(p->pattern->options().max_mem()));
543
+
544
+ rb_hash_aset(options, ID2SYM(id_literal),
545
+ BOOL2RUBY(p->pattern->options().literal()));
546
+
547
+ rb_hash_aset(options, ID2SYM(id_never_nl),
548
+ BOOL2RUBY(p->pattern->options().never_nl()));
549
+
550
+ rb_hash_aset(options, ID2SYM(id_case_sensitive),
551
+ BOOL2RUBY(p->pattern->options().case_sensitive()));
552
+
553
+ rb_hash_aset(options, ID2SYM(id_perl_classes),
554
+ BOOL2RUBY(p->pattern->options().perl_classes()));
555
+
556
+ rb_hash_aset(options, ID2SYM(id_word_boundary),
557
+ BOOL2RUBY(p->pattern->options().word_boundary()));
558
+
559
+ rb_hash_aset(options, ID2SYM(id_one_line),
560
+ BOOL2RUBY(p->pattern->options().one_line()));
561
+
562
+ // This is a read-only hash after all...
563
+ OBJ_FREEZE(options);
564
+
565
+ return options;
566
+ }
567
+
568
+ /*
569
+ * call-seq:
570
+ * re2.number_of_capturing_groups -> int
571
+ *
572
+ * Returns the number of capturing subpatterns, or -1 if the regexp
573
+ * wasn't valid on construction. The overall match ($0) does not
574
+ * count: if the regexp is "(a)(b)", returns 2.
575
+ */
576
+ static VALUE
577
+ re2_number_of_capturing_groups(VALUE self)
578
+ {
579
+ re2_pattern *p;
580
+
581
+ Data_Get_Struct(self, re2_pattern, p);
582
+ return INT2FIX(p->pattern->NumberOfCapturingGroups());
583
+ }
584
+
585
+ /*
586
+ * call-seq:
587
+ * re2.match(text) -> [match, match]
588
+ * re2.match(text, 0) -> true or false
589
+ * re2.match(text, num_of_matches) -> [match, match]
590
+ *
591
+ * Looks for the pattern in +re2+ in +text+; when specified
592
+ * without a second argument, will return an array of the matching
593
+ * pattern and all subpatterns. If the second argument is 0, a
594
+ * simple true or false will be returned to indicate a successful
595
+ * match. If the second argument is any integer greater than 0,
596
+ * that number of matches will be returned (padded with nils if
597
+ * there are insufficient matches).
598
+ *
599
+ * r = RE2.new('w(o)(o)')
600
+ * r.match('woo') #=> ["woo", "o", "o"]
601
+ * r.match('woo', 0) #=> true
602
+ * r.match('bob', 0) #=> false
603
+ * r.match('woo', 1) #=> ["woo", "o"]
604
+ */
605
+ static VALUE
606
+ re2_match(int argc, VALUE *argv, VALUE self)
607
+ {
608
+ int n;
609
+ bool matched;
610
+ re2_pattern *p;
611
+ VALUE text, number_of_matches, matches;
612
+ re2::StringPiece *string_matches, *text_as_string_piece;
613
+
614
+ rb_scan_args(argc, argv, "11", &text, &number_of_matches);
615
+
616
+ Data_Get_Struct(self, re2_pattern, p);
617
+
618
+ if (RTEST(number_of_matches)) {
619
+ n = NUM2INT(number_of_matches);
620
+ } else {
621
+ n = p->pattern->NumberOfCapturingGroups();
622
+ }
623
+
624
+ text_as_string_piece = new re2::StringPiece(StringValuePtr(text));
625
+
626
+ if (n == 0) {
627
+ return BOOL2RUBY(p->pattern->Match(*text_as_string_piece, 0, RE2::UNANCHORED, 0, 0));
628
+ } else {
629
+
630
+ /* Because match returns the whole match as well. */
631
+ n += 1;
632
+
633
+ string_matches = new re2::StringPiece[n];
634
+
635
+ matched = p->pattern->Match(*text_as_string_piece, 0, RE2::UNANCHORED, string_matches, n);
636
+
637
+ if (matched) {
638
+ matches = rb_ary_new();
639
+
640
+ for (int i = 0; i < n; i++) {
641
+ if (!string_matches[i].empty()) {
642
+ rb_ary_push(matches, rb_str_new2(string_matches[i].as_string().c_str()));
643
+ } else {
644
+ rb_ary_push(matches, Qnil);
645
+ }
646
+ }
647
+
648
+ return matches;
649
+ } else {
650
+ return Qnil;
651
+ }
652
+ }
653
+ }
654
+
655
+ /*
656
+ * call-seq:
657
+ * re2.match?(text) -> true or false
658
+ * re2 =~ text -> true or false
659
+ *
660
+ * Returns true or false to indicate a successful match.
661
+ * Equivalent to +re2.match(text, 0)+.
662
+ */
663
+ static VALUE
664
+ re2_match_query(VALUE self, VALUE text)
665
+ {
666
+ VALUE argv[2];
667
+ argv[0] = text;
668
+ argv[1] = INT2FIX(0);
669
+
670
+ return re2_match(2, argv, self);
671
+ }
672
+
673
+ /*
674
+ * call-seq:
675
+ * re2 !~ text -> true or false
676
+ *
677
+ * Returns true or false to indicate an unsuccessful match.
678
+ * Equivalent to +!re2.match(text, 0)+.
679
+ */
680
+ static VALUE
681
+ re2_bang_tilde(VALUE self, VALUE text)
682
+ {
683
+ return BOOL2RUBY(re2_match_query(self, text) != Qtrue);
684
+ }
685
+
686
+ /*
687
+ * call-seq:
688
+ * RE2::FullMatch(text, re) -> true or false
689
+ *
690
+ * Returns whether or not a full match for +re2+ was
691
+ * found in text.
692
+ *
693
+ * RE2::FullMatch("woo", "wo+") #=> true
694
+ * RE2::FullMatch("woo", "a") #=> false
695
+ * re2 = RE2.new("woo")
696
+ * RE2::FullMatch("woo", re2) #=> true
697
+ */
698
+ static VALUE
699
+ re2_FullMatch(VALUE self, VALUE text, VALUE re)
700
+ {
701
+ UNUSED(self);
702
+ bool result;
703
+ re2_pattern *p;
704
+
705
+ if (rb_obj_is_kind_of(re, re2_cRE2)) {
706
+ Data_Get_Struct(re, re2_pattern, p);
707
+ result = RE2::FullMatch(StringValuePtr(text), *p->pattern);
708
+ } else {
709
+ result = RE2::FullMatch(StringValuePtr(text), StringValuePtr(re));
710
+ }
711
+
712
+ return BOOL2RUBY(result);
713
+ }
714
+
715
+ /*
716
+ * call-seq:
717
+ * RE2::FullMatchN(text, re) -> array of matches
718
+ *
719
+ * Returns an array of successful matches as defined in
720
+ * +re+ for +text+.
721
+ *
722
+ * RE2::FullMatchN("woo", "w(oo)") #=> ["oo"]
723
+ */
724
+ static VALUE
725
+ re2_FullMatchN(VALUE self, VALUE text, VALUE re)
726
+ {
727
+ UNUSED(self);
728
+ int n;
729
+ bool matched;
730
+ re2_pattern *p;
731
+ VALUE matches;
732
+ RE2 *compiled_pattern;
733
+ RE2::Arg *argv;
734
+ const RE2::Arg **args;
735
+ std::string *string_matches;
736
+
737
+ if (rb_obj_is_kind_of(re, re2_cRE2)) {
738
+ Data_Get_Struct(re, re2_pattern, p);
739
+ compiled_pattern = p->pattern;
740
+ } else {
741
+ compiled_pattern = new RE2(StringValuePtr(re));
742
+ }
743
+
744
+ n = compiled_pattern->NumberOfCapturingGroups();
745
+
746
+ argv = new RE2::Arg[n];
747
+ args = new const RE2::Arg*[n];
748
+ string_matches = new std::string[n];
749
+
750
+ for (int i = 0; i < n; i++) {
751
+ args[i] = &argv[i];
752
+ argv[i] = &string_matches[i];
753
+ }
754
+
755
+ matched = RE2::FullMatchN(StringValuePtr(text), *compiled_pattern, args, n);
756
+
757
+ if (matched) {
758
+ matches = rb_ary_new();
759
+
760
+ for (int i = 0; i < n; i++) {
761
+ if (!string_matches[i].empty()) {
762
+ rb_ary_push(matches, rb_str_new2(string_matches[i].c_str()));
763
+ } else {
764
+ rb_ary_push(matches, Qnil);
765
+ }
766
+ }
767
+
768
+ return matches;
769
+ } else {
770
+ return Qnil;
771
+ }
772
+ }
773
+
774
+ /*
775
+ * call-seq:
776
+ * RE2::PartialMatchN(text, re) -> array of matches
777
+ *
778
+ * Returns an array of successful matches as defined in
779
+ * +re+ for +text+.
780
+ *
781
+ * RE2::PartialMatchN("woo", "w(oo)") #=> ["oo"]
782
+ */
783
+ static VALUE
784
+ re2_PartialMatchN(VALUE self, VALUE text, VALUE re)
785
+ {
786
+ UNUSED(self);
787
+ int n;
788
+ bool matched;
789
+ re2_pattern *p;
790
+ VALUE matches;
791
+ RE2 *compiled_pattern;
792
+ RE2::Arg *argv;
793
+ const RE2::Arg **args;
794
+ std::string *string_matches;
795
+
796
+ if (rb_obj_is_kind_of(re, re2_cRE2)) {
797
+ Data_Get_Struct(re, re2_pattern, p);
798
+ compiled_pattern = p->pattern;
799
+ } else {
800
+ compiled_pattern = new RE2(StringValuePtr(re));
801
+ }
802
+
803
+ n = compiled_pattern->NumberOfCapturingGroups();
804
+
805
+ argv = new RE2::Arg[n];
806
+ args = new const RE2::Arg*[n];
807
+ string_matches = new std::string[n];
808
+
809
+ for (int i = 0; i < n; i++) {
810
+ args[i] = &argv[i];
811
+ argv[i] = &string_matches[i];
812
+ }
813
+
814
+ matched = RE2::PartialMatchN(StringValuePtr(text), *compiled_pattern, args, n);
815
+
816
+ if (matched) {
817
+ matches = rb_ary_new();
818
+
819
+ for (int i = 0; i < n; i++) {
820
+ if (!string_matches[i].empty()) {
821
+ rb_ary_push(matches, rb_str_new2(string_matches[i].c_str()));
822
+ } else {
823
+ rb_ary_push(matches, Qnil);
824
+ }
825
+ }
826
+
827
+ return matches;
828
+ } else {
829
+ return Qnil;
830
+ }
831
+ }
832
+
833
+ /*
834
+ * call-seq:
835
+ * RE2::PartialMatch(text, re) -> true or false
836
+ *
837
+ * Returns whether or not a partial match for +re2+ was
838
+ * found in text.
839
+ *
840
+ * RE2::PartialMatch("woo", "o+") #=> true
841
+ * RE2::PartialMatch("woo", "a") #=> false
842
+ * re2 = RE2.new("oo?")
843
+ * RE2::PartialMatch("woo", re2) #=> true
844
+ */
845
+ static VALUE
846
+ re2_PartialMatch(VALUE self, VALUE text, VALUE re)
847
+ {
848
+ UNUSED(self);
849
+ bool result;
850
+ re2_pattern *p;
851
+
852
+ if (rb_obj_is_kind_of(re, re2_cRE2)) {
853
+ Data_Get_Struct(re, re2_pattern, p);
854
+ result = RE2::PartialMatch(StringValuePtr(text), *p->pattern);
855
+ } else {
856
+ result = RE2::PartialMatch(StringValuePtr(text), StringValuePtr(re));
857
+ }
858
+
859
+ return BOOL2RUBY(result);
860
+ }
861
+
862
+ /*
863
+ * call-seq:
864
+ * RE2::Replace(str, pattern, rewrite) -> str
865
+ *
866
+ * Replaces the first occurrence +pattern+ in +str+ with
867
+ * +rewrite+ <i>in place</i>.
868
+ *
869
+ * RE2::Replace("hello there", "hello", "howdy") #=> "howdy there"
870
+ * re2 = RE2.new("hel+o")
871
+ * RE2::Replace("hello there", re2, "yo") #=> "yo there"
872
+ * text = "Good morning"
873
+ * RE2::Replace(text, "morn", "even") #=> "Good evening"
874
+ * text #=> "Good evening"
875
+ */
876
+ static VALUE
877
+ re2_Replace(VALUE self, VALUE str, VALUE pattern, VALUE rewrite)
878
+ {
879
+ UNUSED(self);
880
+ VALUE repl;
881
+ re2_pattern *p;
882
+
883
+ // Convert all the inputs to be pumped into RE2::Replace.
884
+ std::string str_as_string(StringValuePtr(str));
885
+ re2::StringPiece rewrite_as_string_piece(StringValuePtr(rewrite));
886
+
887
+ // Do the replacement.
888
+ if (rb_obj_is_kind_of(pattern, re2_cRE2)) {
889
+ Data_Get_Struct(pattern, re2_pattern, p);
890
+ RE2::Replace(&str_as_string, *p->pattern, rewrite_as_string_piece);
891
+ } else {
892
+ RE2::Replace(&str_as_string, StringValuePtr(pattern), rewrite_as_string_piece);
893
+ }
894
+
895
+ // Save the replacement as a VALUE.
896
+ repl = rb_str_new(str_as_string.c_str(), str_as_string.length());
897
+
898
+ // Replace the original string with the replacement.
899
+ rb_str_update(str, 0, RSTRING_LEN(str), repl);
900
+
901
+ return str;
902
+ }
903
+
904
+ /*
905
+ * call-seq:
906
+ * RE2::GlobalReplace(str, pattern, rewrite) -> str
907
+ *
908
+ * Replaces every occurrence of +pattern+ in +str+ with
909
+ * +rewrite+ <i>in place</i>.
910
+ *
911
+ * RE2::GlobalReplace("hello there", "e", "i") #=> "hillo thiri"
912
+ * re2 = RE2.new("oo?")
913
+ * RE2::GlobalReplace("whoops-doops", re2, "e") #=> "wheps-deps"
914
+ * text = "Good morning"
915
+ * RE2::GlobalReplace(text, "o", "ee") #=> "Geeeed meerning"
916
+ * text #=> "Geeeed meerning"
917
+ */
918
+ static VALUE
919
+ re2_GlobalReplace(VALUE self, VALUE str, VALUE pattern, VALUE rewrite)
920
+ {
921
+ UNUSED(self);
922
+
923
+ // Convert all the inputs to be pumped into RE2::GlobalReplace.
924
+ re2_pattern *p;
925
+ std::string str_as_string(StringValuePtr(str));
926
+ re2::StringPiece rewrite_as_string_piece(StringValuePtr(rewrite));
927
+ VALUE repl;
928
+
929
+ // Do the replacement.
930
+ if (rb_obj_is_kind_of(pattern, re2_cRE2)) {
931
+ Data_Get_Struct(pattern, re2_pattern, p);
932
+ RE2::GlobalReplace(&str_as_string, *p->pattern, rewrite_as_string_piece);
933
+ } else {
934
+ RE2::GlobalReplace(&str_as_string, StringValuePtr(pattern), rewrite_as_string_piece);
935
+ }
936
+
937
+ // Save the replacement as a VALUE.
938
+ repl = rb_str_new(str_as_string.c_str(), str_as_string.length());
939
+
940
+ // Replace the original string with the replacement.
941
+ rb_str_update(str, 0, RSTRING_LEN(str), repl);
942
+
943
+ return str;
944
+ }
945
+
946
+ /*
947
+ * call-seq:
948
+ * RE2::QuoteMeta(str) -> str
949
+ * RE2.escape(str) -> str
950
+ * RE2.quote(str) -> str
951
+ *
952
+ * Returns a version of str with all potentially meaningful regexp
953
+ * characters escaped. The returned string, used as a regular
954
+ * expression, will exactly match the original string.
955
+ *
956
+ * RE2::QuoteMeta("1.5-2.0?") #=> "1\.5\-2\.0\?"
957
+ */
958
+ static VALUE
959
+ re2_QuoteMeta(VALUE self, VALUE unquoted)
960
+ {
961
+ UNUSED(self);
962
+ re2::StringPiece unquoted_as_string_piece(StringValuePtr(unquoted));
963
+ return rb_str_new2(RE2::QuoteMeta(unquoted_as_string_piece).c_str());
964
+ }
965
+
966
+ void
967
+ Init_re2()
968
+ {
969
+ re2_cRE2 = rb_define_class("RE2", rb_cObject);
970
+ rb_define_alloc_func(re2_cRE2, (VALUE (*)(VALUE))re2_allocate);
971
+ rb_define_method(re2_cRE2, "initialize", (VALUE (*)(...))re2_initialize, -1);
972
+ rb_define_method(re2_cRE2, "ok?", (VALUE (*)(...))re2_ok, 0);
973
+ rb_define_method(re2_cRE2, "error", (VALUE (*)(...))re2_error, 0);
974
+ rb_define_method(re2_cRE2, "error_arg", (VALUE (*)(...))re2_error_arg, 0);
975
+ rb_define_method(re2_cRE2, "program_size", (VALUE (*)(...))re2_program_size, 0);
976
+ rb_define_method(re2_cRE2, "options", (VALUE (*)(...))re2_options, 0);
977
+ rb_define_method(re2_cRE2, "number_of_capturing_groups", (VALUE (*)(...))re2_number_of_capturing_groups, 0);
978
+ rb_define_method(re2_cRE2, "match", (VALUE (*)(...))re2_match, -1);
979
+ rb_define_method(re2_cRE2, "match?", (VALUE (*)(...))re2_match_query, 1);
980
+ rb_define_method(re2_cRE2, "=~", (VALUE (*)(...))re2_match_query, 1);
981
+ rb_define_method(re2_cRE2, "===", (VALUE (*)(...))re2_match_query, 1);
982
+ rb_define_method(re2_cRE2, "!~", (VALUE (*)(...))re2_bang_tilde, 1);
983
+ rb_define_method(re2_cRE2, "to_s", (VALUE (*)(...))re2_to_s, 0);
984
+ rb_define_method(re2_cRE2, "to_str", (VALUE (*)(...))re2_to_s, 0);
985
+ rb_define_method(re2_cRE2, "pattern", (VALUE (*)(...))re2_to_s, 0);
986
+ rb_define_method(re2_cRE2, "source", (VALUE (*)(...))re2_to_s, 0);
987
+ rb_define_method(re2_cRE2, "inspect", (VALUE (*)(...))re2_inspect, 0);
988
+ rb_define_method(re2_cRE2, "utf8?", (VALUE (*)(...))re2_utf8, 0);
989
+ rb_define_method(re2_cRE2, "posix_syntax?", (VALUE (*)(...))re2_posix_syntax, 0);
990
+ rb_define_method(re2_cRE2, "longest_match?", (VALUE (*)(...))re2_longest_match, 0);
991
+ rb_define_method(re2_cRE2, "log_errors?", (VALUE (*)(...))re2_log_errors, 0);
992
+ rb_define_method(re2_cRE2, "max_mem", (VALUE (*)(...))re2_max_mem, 0);
993
+ rb_define_method(re2_cRE2, "literal?", (VALUE (*)(...))re2_literal, 0);
994
+ rb_define_method(re2_cRE2, "never_nl?", (VALUE (*)(...))re2_never_nl, 0);
995
+ rb_define_method(re2_cRE2, "case_sensitive?", (VALUE (*)(...))re2_case_sensitive, 0);
996
+ rb_define_method(re2_cRE2, "case_insensitive?", (VALUE (*)(...))re2_case_insensitive, 0);
997
+ rb_define_method(re2_cRE2, "casefold?", (VALUE (*)(...))re2_case_insensitive, 0);
998
+ rb_define_method(re2_cRE2, "perl_classes?", (VALUE (*)(...))re2_perl_classes, 0);
999
+ rb_define_method(re2_cRE2, "word_boundary?", (VALUE (*)(...))re2_word_boundary, 0);
1000
+ rb_define_method(re2_cRE2, "one_line?", (VALUE (*)(...))re2_one_line, 0);
1001
+ rb_define_singleton_method(re2_cRE2, "FullMatch", (VALUE (*)(...))re2_FullMatch, 2);
1002
+ rb_define_singleton_method(re2_cRE2, "FullMatchN", (VALUE (*)(...))re2_FullMatchN, 2);
1003
+ rb_define_singleton_method(re2_cRE2, "PartialMatch", (VALUE (*)(...))re2_PartialMatch, 2);
1004
+ rb_define_singleton_method(re2_cRE2, "PartialMatchN", (VALUE (*)(...))re2_PartialMatchN, 2);
1005
+ rb_define_singleton_method(re2_cRE2, "Replace", (VALUE (*)(...))re2_Replace, 3);
1006
+ rb_define_singleton_method(re2_cRE2, "GlobalReplace", (VALUE (*)(...))re2_GlobalReplace, 3);
1007
+ rb_define_singleton_method(re2_cRE2, "QuoteMeta", (VALUE (*)(...))re2_QuoteMeta, 1);
1008
+ rb_define_singleton_method(re2_cRE2, "escape", (VALUE (*)(...))re2_QuoteMeta, 1);
1009
+ rb_define_singleton_method(re2_cRE2, "quote", (VALUE (*)(...))re2_QuoteMeta, 1);
1010
+ rb_define_singleton_method(re2_cRE2, "compile", (VALUE (*)(...))rb_class_new_instance, -1);
1011
+ rb_define_global_function("RE2", (VALUE (*)(...))re2_re2, -1);
1012
+
1013
+ /* Create the symbols used in options. */
1014
+ id_utf8 = rb_intern("utf8");
1015
+ id_posix_syntax = rb_intern("posix_syntax");
1016
+ id_longest_match = rb_intern("longest_match");
1017
+ id_log_errors = rb_intern("log_errors");
1018
+ id_max_mem = rb_intern("max_mem");
1019
+ id_literal = rb_intern("literal");
1020
+ id_never_nl = rb_intern("never_nl");
1021
+ id_case_sensitive = rb_intern("case_sensitive");
1022
+ id_perl_classes = rb_intern("perl_classes");
1023
+ id_word_boundary = rb_intern("word_boundary");
1024
+ id_one_line = rb_intern("one_line");
1025
+ }
1026
+ }