re2 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/LICENSE.txt +28 -0
- data/README.md +70 -0
- data/Rakefile +19 -0
- data/ext/re2/extconf.rb +18 -0
- data/ext/re2/re2.cc +1026 -0
- data/test/re2_test.rb +228 -0
- metadata +72 -0
data/LICENSE.txt
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
Copyright (c) 2010, Paul Mucur.
|
2
|
+
All rights reserved.
|
3
|
+
|
4
|
+
Redistribution and use in source and binary forms, with or without
|
5
|
+
modification, are permitted provided that the following conditions are met:
|
6
|
+
|
7
|
+
* Redistributions of source code must retain the above copyright notice, this
|
8
|
+
list of conditions and the following disclaimer.
|
9
|
+
|
10
|
+
* Redistributions in binary form must reproduce the above copyright notice,
|
11
|
+
this list of conditions and the following disclaimer in the documentation
|
12
|
+
and/or other materials provided with the distribution.
|
13
|
+
|
14
|
+
* Neither the name of Paul Mucur, nor the names of its contributors may be
|
15
|
+
used to endorse or promote products derived from this software without
|
16
|
+
specific prior written permission.
|
17
|
+
|
18
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
19
|
+
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
20
|
+
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
21
|
+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
22
|
+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
23
|
+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
24
|
+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
25
|
+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
26
|
+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
27
|
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
28
|
+
|
data/README.md
ADDED
@@ -0,0 +1,70 @@
|
|
1
|
+
re2
|
2
|
+
===
|
3
|
+
|
4
|
+
A Ruby binding to [re2][], an "efficient, principled regular expression library".
|
5
|
+
|
6
|
+
Installation
|
7
|
+
------------
|
8
|
+
|
9
|
+
You will need [re2][] installed in its default location of /usr/local as well as a C++ compiler such as [gcc][] (on Debian and Ubuntu, this is provided by the [build-essential][] package).
|
10
|
+
|
11
|
+
If you are using a packaged Ruby distribution, make sure you also have the Ruby header files installed such as those provided by the [ruby-dev][] package on Debian and Ubuntu.
|
12
|
+
|
13
|
+
You can then install the library via RubyGems: `gem install re2`
|
14
|
+
|
15
|
+
Usage
|
16
|
+
-----
|
17
|
+
|
18
|
+
You can use re2 as a mostly drop-in replacement for Ruby's own [Regexp][] class:
|
19
|
+
|
20
|
+
$ irb -rubygems
|
21
|
+
> require 're2'
|
22
|
+
> r = RE2.compile('w(\d)(\d+)')
|
23
|
+
=> /w(\d)(\d+)/
|
24
|
+
> r.match("w1234")
|
25
|
+
=> ["w1234", "1", "234"]
|
26
|
+
> r =~ "w1234"
|
27
|
+
=> true
|
28
|
+
> r !~ "bob"
|
29
|
+
=> true
|
30
|
+
> r.match("bob")
|
31
|
+
=> nil
|
32
|
+
|
33
|
+
Features
|
34
|
+
--------
|
35
|
+
|
36
|
+
* Pre-compiling regular expressions with [`RE2.new(re)`](http://code.google.com/p/re2/source/browse/re2/re2.h#96), `RE2.compile(re)` or `RE2(re)` (including specifying options, e.g. `RE2.new("pattern", :case_sensitive => false)`
|
37
|
+
|
38
|
+
* Extracting matches with `re2.match(text)` (and an exact number of matches with `re2.match(text, number_of_matches)` such as `re2.match("123-234", 2)`)
|
39
|
+
|
40
|
+
* Checking for matches with `re2 =~ text`, `re2 === text` (for use in `case` statements) and `re2 !~ text`
|
41
|
+
|
42
|
+
* Checking regular expression compilation with `re2.ok?`, `re2.error` and `re2.error_arg`
|
43
|
+
|
44
|
+
* Checking regular expression "cost" with `re2.program_size`
|
45
|
+
|
46
|
+
* Checking the options for an expression with `re2.options` or individually with `re2.case_sensitive?`
|
47
|
+
|
48
|
+
* Performing full matches with [`RE2::FullMatch(text, re)`](http://code.google.com/p/re2/source/browse/re2/re2.h#30)
|
49
|
+
|
50
|
+
* Performing partial matches with [`RE2::PartialMatch(text, re)`](http://code.google.com/p/re2/source/browse/re2/re2.h#82)
|
51
|
+
|
52
|
+
* Performing in-place replacement with [`RE2::Replace(str, pattern, replace)`](http://code.google.com/p/re2/source/browse/re2/re2.h#335)
|
53
|
+
|
54
|
+
* Performing in-place global replacement with [`RE2::GlobalReplace(str, pattern, replace)`](http://code.google.com/p/re2/source/browse/re2/re2.h#352)
|
55
|
+
|
56
|
+
* Escaping regular expressions with [`RE2::QuoteMeta(unquoted)`](http://code.google.com/p/re2/source/browse/re2/re2.h#377), `RE2.escape(unquoted)` or `RE2.quote(unquoted)`
|
57
|
+
|
58
|
+
re2.cc should be well-documented so feel free to consult this file to see what can currently be used.
|
59
|
+
|
60
|
+
Contact
|
61
|
+
-------
|
62
|
+
|
63
|
+
All feedback should go to the mailing list: ruby.re2@librelist.com
|
64
|
+
|
65
|
+
[re2]: http://code.google.com/p/re2/
|
66
|
+
[gcc]: http://gcc.gnu.org/
|
67
|
+
[ruby-dev]: http://packages.debian.org/ruby-dev
|
68
|
+
[build-essential]: http://packages.debian.org/build-essential
|
69
|
+
[Regexp]: http://ruby-doc.org/core/classes/Regexp.html
|
70
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
begin
|
2
|
+
require 'rake/extensiontask'
|
3
|
+
require 'rake/testtask'
|
4
|
+
rescue LoadError
|
5
|
+
require 'rubygems'
|
6
|
+
require 'rake/extensiontask'
|
7
|
+
require 'rake/testtask'
|
8
|
+
end
|
9
|
+
|
10
|
+
Rake::ExtensionTask.new('re2')
|
11
|
+
|
12
|
+
Rake::TestTask.new do |t|
|
13
|
+
t.test_files = FileList["test/*_test.rb"]
|
14
|
+
t.verbose = true
|
15
|
+
end
|
16
|
+
|
17
|
+
task :test => :compile
|
18
|
+
task :default => :test
|
19
|
+
|
data/ext/re2/extconf.rb
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
# re2 (http://github.com/mudge/re2)
|
2
|
+
# Ruby bindings to re2, an "efficient, principled regular expression library"
|
3
|
+
#
|
4
|
+
# Copyright (c) 2010, Paul Mucur (http://mucur.name)
|
5
|
+
# Released under the BSD Licence, please see LICENSE.txt
|
6
|
+
|
7
|
+
require 'mkmf'
|
8
|
+
|
9
|
+
incl, lib = dir_config("re2", "/usr/local/include", "/usr/local/lib")
|
10
|
+
|
11
|
+
$CFLAGS << " -Wall -Wextra -funroll-loops"
|
12
|
+
|
13
|
+
have_library("stdc++")
|
14
|
+
if have_library("re2")
|
15
|
+
create_makefile("re2")
|
16
|
+
else
|
17
|
+
abort "You must have re2 installed and specified with --with-re2-dir, please see http://code.google.com/p/re2/wiki/Install"
|
18
|
+
end
|
data/ext/re2/re2.cc
ADDED
@@ -0,0 +1,1026 @@
|
|
1
|
+
/*
|
2
|
+
* re2 (http://github.com/mudge/re2)
|
3
|
+
* Ruby bindings to re2, an "efficient, principled regular expression library"
|
4
|
+
*
|
5
|
+
* Copyright (c) 2010, Paul Mucur (http://mucur.name)
|
6
|
+
* Released under the BSD Licence, please see LICENSE.txt
|
7
|
+
*/
|
8
|
+
|
9
|
+
#include <re2/re2.h>
|
10
|
+
|
11
|
+
extern "C" {
|
12
|
+
|
13
|
+
#include <ruby.h>
|
14
|
+
|
15
|
+
#define BOOL2RUBY(v) (v ? Qtrue : Qfalse)
|
16
|
+
#define UNUSED(x) ((void)x)
|
17
|
+
|
18
|
+
#if !defined(RSTRING_LEN)
|
19
|
+
# define RSTRING_LEN(x) (RSTRING(x)->len)
|
20
|
+
#endif
|
21
|
+
|
22
|
+
typedef struct _re2p {
|
23
|
+
RE2 *pattern;
|
24
|
+
} re2_pattern;
|
25
|
+
|
26
|
+
VALUE re2_cRE2;
|
27
|
+
|
28
|
+
/* Symbols used in RE2 options. */
|
29
|
+
static ID id_utf8, id_posix_syntax, id_longest_match, id_log_errors,
|
30
|
+
id_max_mem, id_literal, id_never_nl, id_case_sensitive,
|
31
|
+
id_perl_classes, id_word_boundary, id_one_line;
|
32
|
+
|
33
|
+
void
|
34
|
+
re2_free(re2_pattern* self)
|
35
|
+
{
|
36
|
+
free(self);
|
37
|
+
}
|
38
|
+
|
39
|
+
static VALUE
|
40
|
+
re2_allocate(VALUE klass)
|
41
|
+
{
|
42
|
+
re2_pattern *p = (re2_pattern*)malloc(sizeof(re2_pattern));
|
43
|
+
p->pattern = NULL;
|
44
|
+
return Data_Wrap_Struct(klass, 0, re2_free, p);
|
45
|
+
}
|
46
|
+
|
47
|
+
/*
|
48
|
+
* call-seq:
|
49
|
+
* RE2(pattern) -> re2
|
50
|
+
* RE2(pattern, options) -> re2
|
51
|
+
*
|
52
|
+
* Returns a new RE2 object with a compiled version of
|
53
|
+
* +pattern+ stored inside. Equivalent to +RE2.new+.
|
54
|
+
*/
|
55
|
+
static VALUE
|
56
|
+
re2_re2(int argc, VALUE *argv, VALUE self)
|
57
|
+
{
|
58
|
+
UNUSED(self);
|
59
|
+
return rb_class_new_instance(argc, argv, re2_cRE2);
|
60
|
+
}
|
61
|
+
|
62
|
+
/*
|
63
|
+
* call-seq:
|
64
|
+
* RE2.new(pattern) -> re2
|
65
|
+
* RE2.new(pattern, options) -> re2
|
66
|
+
* RE2.compile(pattern) -> re2
|
67
|
+
* RE2.compile(pattern, options) -> re2
|
68
|
+
*
|
69
|
+
* Returns a new RE2 object with a compiled version of
|
70
|
+
* +pattern+ stored inside.
|
71
|
+
*
|
72
|
+
* Options can be a hash with the following keys:
|
73
|
+
*
|
74
|
+
* :utf8 - text and pattern are UTF-8; otherwise
|
75
|
+
* Latin-1 (default true)
|
76
|
+
*
|
77
|
+
* :posix_syntax - restrict regexps to POSIX egrep syntax
|
78
|
+
* (default false)
|
79
|
+
*
|
80
|
+
* :longest_match - search for longest match, not first match
|
81
|
+
* (default false)
|
82
|
+
*
|
83
|
+
* :log_errors - log syntax and execution errors to ERROR
|
84
|
+
* (default true)
|
85
|
+
*
|
86
|
+
* :max_mem - approx. max memory footprint of RE2
|
87
|
+
*
|
88
|
+
* :literal - interpret string as literal, not regexp
|
89
|
+
* (default false)
|
90
|
+
*
|
91
|
+
* :never_nl - never match \n, even if it is in regexp
|
92
|
+
* (default false)
|
93
|
+
*
|
94
|
+
* :case_sensitive - match is case-sensitive (regexp can override
|
95
|
+
* with (?i) unless in posix_syntax mode)
|
96
|
+
* (default true)
|
97
|
+
*
|
98
|
+
* :perl_classes - allow Perl's \d \s \w \D \S \W when in
|
99
|
+
* posix_syntax mode (default false)
|
100
|
+
*
|
101
|
+
* :word_boundary - allow \b \B (word boundary and not) when
|
102
|
+
* in posix_syntax mode (default false)
|
103
|
+
*
|
104
|
+
* :one_line - ^ and $ only match beginning and end of text
|
105
|
+
* when in posix_syntax mode (default false)
|
106
|
+
*/
|
107
|
+
static VALUE
|
108
|
+
re2_initialize(int argc, VALUE *argv, VALUE self)
|
109
|
+
{
|
110
|
+
VALUE pattern, options, utf8, posix_syntax, longest_match, log_errors,
|
111
|
+
max_mem, literal, never_nl, case_sensitive, perl_classes,
|
112
|
+
word_boundary, one_line;
|
113
|
+
re2_pattern *p;
|
114
|
+
RE2::Options *re2_options;
|
115
|
+
|
116
|
+
rb_scan_args(argc, argv, "11", &pattern, &options);
|
117
|
+
Data_Get_Struct(self, re2_pattern, p);
|
118
|
+
|
119
|
+
if (RTEST(options)) {
|
120
|
+
if (TYPE(options) != T_HASH) {
|
121
|
+
rb_raise(rb_eArgError, "options should be a hash");
|
122
|
+
}
|
123
|
+
|
124
|
+
re2_options = new RE2::Options();
|
125
|
+
|
126
|
+
utf8 = rb_hash_aref(options, ID2SYM(id_utf8));
|
127
|
+
if (!NIL_P(utf8)) {
|
128
|
+
re2_options->set_utf8(RTEST(utf8));
|
129
|
+
}
|
130
|
+
|
131
|
+
posix_syntax = rb_hash_aref(options, ID2SYM(id_posix_syntax));
|
132
|
+
if (!NIL_P(posix_syntax)) {
|
133
|
+
re2_options->set_posix_syntax(RTEST(posix_syntax));
|
134
|
+
}
|
135
|
+
|
136
|
+
longest_match = rb_hash_aref(options, ID2SYM(id_longest_match));
|
137
|
+
if (!NIL_P(longest_match)) {
|
138
|
+
re2_options->set_longest_match(RTEST(longest_match));
|
139
|
+
}
|
140
|
+
|
141
|
+
log_errors = rb_hash_aref(options, ID2SYM(id_log_errors));
|
142
|
+
if (!NIL_P(log_errors)) {
|
143
|
+
re2_options->set_log_errors(RTEST(log_errors));
|
144
|
+
}
|
145
|
+
|
146
|
+
max_mem = rb_hash_aref(options, ID2SYM(id_max_mem));
|
147
|
+
if (!NIL_P(max_mem)) {
|
148
|
+
re2_options->set_max_mem(NUM2INT(max_mem));
|
149
|
+
}
|
150
|
+
|
151
|
+
literal = rb_hash_aref(options, ID2SYM(id_literal));
|
152
|
+
if (!NIL_P(literal)) {
|
153
|
+
re2_options->set_literal(RTEST(literal));
|
154
|
+
}
|
155
|
+
|
156
|
+
never_nl = rb_hash_aref(options, ID2SYM(id_never_nl));
|
157
|
+
if (!NIL_P(never_nl)) {
|
158
|
+
re2_options->set_never_nl(RTEST(never_nl));
|
159
|
+
}
|
160
|
+
|
161
|
+
case_sensitive = rb_hash_aref(options, ID2SYM(id_case_sensitive));
|
162
|
+
if (!NIL_P(case_sensitive)) {
|
163
|
+
re2_options->set_case_sensitive(RTEST(case_sensitive));
|
164
|
+
}
|
165
|
+
|
166
|
+
perl_classes = rb_hash_aref(options, ID2SYM(id_perl_classes));
|
167
|
+
if (!NIL_P(perl_classes)) {
|
168
|
+
re2_options->set_perl_classes(RTEST(perl_classes));
|
169
|
+
}
|
170
|
+
|
171
|
+
word_boundary = rb_hash_aref(options, ID2SYM(id_word_boundary));
|
172
|
+
if (!NIL_P(word_boundary)) {
|
173
|
+
re2_options->set_word_boundary(RTEST(word_boundary));
|
174
|
+
}
|
175
|
+
|
176
|
+
one_line = rb_hash_aref(options, ID2SYM(id_one_line));
|
177
|
+
if (!NIL_P(one_line)) {
|
178
|
+
re2_options->set_one_line(RTEST(one_line));
|
179
|
+
}
|
180
|
+
|
181
|
+
p->pattern = new RE2(StringValuePtr(pattern), *re2_options);
|
182
|
+
} else {
|
183
|
+
p->pattern = new RE2(StringValuePtr(pattern));
|
184
|
+
}
|
185
|
+
|
186
|
+
return self;
|
187
|
+
}
|
188
|
+
|
189
|
+
/*
|
190
|
+
* call-seq:
|
191
|
+
* re2.inspect -> string
|
192
|
+
*
|
193
|
+
* Returns a printable version of the regular expression +re2+,
|
194
|
+
* surrounded by forward slashes.
|
195
|
+
*
|
196
|
+
* re2 = RE2.new("woo?")
|
197
|
+
* re2.inspect #=> "/woo?/"
|
198
|
+
*/
|
199
|
+
static VALUE
|
200
|
+
re2_inspect(VALUE self)
|
201
|
+
{
|
202
|
+
VALUE result = rb_str_buf_new(0);
|
203
|
+
re2_pattern *p;
|
204
|
+
|
205
|
+
rb_str_buf_cat2(result, "/");
|
206
|
+
Data_Get_Struct(self, re2_pattern, p);
|
207
|
+
rb_str_buf_cat2(result, p->pattern->pattern().c_str());
|
208
|
+
rb_str_buf_cat2(result, "/");
|
209
|
+
|
210
|
+
return result;
|
211
|
+
}
|
212
|
+
|
213
|
+
/*
|
214
|
+
* call-seq:
|
215
|
+
* re2.to_s -> string
|
216
|
+
* re2.to_str -> string
|
217
|
+
* re2.pattern -> string
|
218
|
+
* re2.source -> string
|
219
|
+
* re2.inspect -> string
|
220
|
+
*
|
221
|
+
* Returns a string version of the regular expression +re2+.
|
222
|
+
*
|
223
|
+
* re2 = RE2.new("woo?")
|
224
|
+
* re2.to_s #=> "woo?"
|
225
|
+
*/
|
226
|
+
static VALUE
|
227
|
+
re2_to_s(VALUE self)
|
228
|
+
{
|
229
|
+
re2_pattern *p;
|
230
|
+
Data_Get_Struct(self, re2_pattern, p);
|
231
|
+
return rb_str_new2(p->pattern->pattern().c_str());
|
232
|
+
}
|
233
|
+
|
234
|
+
/*
|
235
|
+
* call-seq:
|
236
|
+
* re2.ok? -> true or false
|
237
|
+
*
|
238
|
+
* Returns whether or not the regular expression +re2+
|
239
|
+
* was compiled successfully or not.
|
240
|
+
*
|
241
|
+
* re2 = RE2.new("woo?")
|
242
|
+
* re2.ok? #=> true
|
243
|
+
*/
|
244
|
+
static VALUE
|
245
|
+
re2_ok(VALUE self)
|
246
|
+
{
|
247
|
+
re2_pattern *p;
|
248
|
+
Data_Get_Struct(self, re2_pattern, p);
|
249
|
+
return BOOL2RUBY(p->pattern->ok());
|
250
|
+
}
|
251
|
+
|
252
|
+
/*
|
253
|
+
* call-seq:
|
254
|
+
* re2.utf8? -> true or false
|
255
|
+
*
|
256
|
+
* Returns whether or not the regular expression +re2+
|
257
|
+
* was compiled with the utf8 option set to true.
|
258
|
+
*
|
259
|
+
* re2 = RE2.new("woo?", :utf8 => true)
|
260
|
+
* re2.utf8? #=> true
|
261
|
+
*/
|
262
|
+
static VALUE
|
263
|
+
re2_utf8(VALUE self)
|
264
|
+
{
|
265
|
+
re2_pattern *p;
|
266
|
+
Data_Get_Struct(self, re2_pattern, p);
|
267
|
+
return BOOL2RUBY(p->pattern->options().utf8());
|
268
|
+
}
|
269
|
+
|
270
|
+
/*
|
271
|
+
* call-seq:
|
272
|
+
* re2.posix_syntax? -> true or false
|
273
|
+
*
|
274
|
+
* Returns whether or not the regular expression +re2+
|
275
|
+
* was compiled with the posix_syntax option set to true.
|
276
|
+
*
|
277
|
+
* re2 = RE2.new("woo?", :posix_syntax => true)
|
278
|
+
* re2.posix_syntax? #=> true
|
279
|
+
*/
|
280
|
+
static VALUE
|
281
|
+
re2_posix_syntax(VALUE self)
|
282
|
+
{
|
283
|
+
re2_pattern *p;
|
284
|
+
Data_Get_Struct(self, re2_pattern, p);
|
285
|
+
return BOOL2RUBY(p->pattern->options().posix_syntax());
|
286
|
+
}
|
287
|
+
|
288
|
+
/*
|
289
|
+
* call-seq:
|
290
|
+
* re2.longest_match? -> true or false
|
291
|
+
*
|
292
|
+
* Returns whether or not the regular expression +re2+
|
293
|
+
* was compiled with the longest_match option set to true.
|
294
|
+
*
|
295
|
+
* re2 = RE2.new("woo?", :longest_match => true)
|
296
|
+
* re2.longest_match? #=> true
|
297
|
+
*/
|
298
|
+
static VALUE
|
299
|
+
re2_longest_match(VALUE self)
|
300
|
+
{
|
301
|
+
re2_pattern *p;
|
302
|
+
Data_Get_Struct(self, re2_pattern, p);
|
303
|
+
return BOOL2RUBY(p->pattern->options().longest_match());
|
304
|
+
}
|
305
|
+
|
306
|
+
/*
|
307
|
+
* call-seq:
|
308
|
+
* re2.log_errors? -> true or false
|
309
|
+
*
|
310
|
+
* Returns whether or not the regular expression +re2+
|
311
|
+
* was compiled with the log_errors option set to true.
|
312
|
+
*
|
313
|
+
* re2 = RE2.new("woo?", :log_errors => true)
|
314
|
+
* re2.log_errors? #=> true
|
315
|
+
*/
|
316
|
+
static VALUE
|
317
|
+
re2_log_errors(VALUE self)
|
318
|
+
{
|
319
|
+
re2_pattern *p;
|
320
|
+
Data_Get_Struct(self, re2_pattern, p);
|
321
|
+
return BOOL2RUBY(p->pattern->options().log_errors());
|
322
|
+
}
|
323
|
+
|
324
|
+
/*
|
325
|
+
* call-seq:
|
326
|
+
* re2.max_mem -> int
|
327
|
+
*
|
328
|
+
* Returns the max_mem setting for the regular expression
|
329
|
+
* +re2+.
|
330
|
+
*
|
331
|
+
* re2 = RE2.new("woo?", :max_mem => 1024)
|
332
|
+
* re2.max_mem #=> 1024
|
333
|
+
*/
|
334
|
+
static VALUE
|
335
|
+
re2_max_mem(VALUE self)
|
336
|
+
{
|
337
|
+
re2_pattern *p;
|
338
|
+
Data_Get_Struct(self, re2_pattern, p);
|
339
|
+
return INT2FIX(p->pattern->options().max_mem());
|
340
|
+
}
|
341
|
+
|
342
|
+
/*
|
343
|
+
* call-seq:
|
344
|
+
* re2.literal? -> true or false
|
345
|
+
*
|
346
|
+
* Returns whether or not the regular expression +re2+
|
347
|
+
* was compiled with the literal option set to true.
|
348
|
+
*
|
349
|
+
* re2 = RE2.new("woo?", :literal => true)
|
350
|
+
* re2.literal? #=> true
|
351
|
+
*/
|
352
|
+
static VALUE
|
353
|
+
re2_literal(VALUE self)
|
354
|
+
{
|
355
|
+
re2_pattern *p;
|
356
|
+
Data_Get_Struct(self, re2_pattern, p);
|
357
|
+
return BOOL2RUBY(p->pattern->options().literal());
|
358
|
+
}
|
359
|
+
|
360
|
+
/*
|
361
|
+
* call-seq:
|
362
|
+
* re2.never_nl? -> true or false
|
363
|
+
*
|
364
|
+
* Returns whether or not the regular expression +re2+
|
365
|
+
* was compiled with the never_nl option set to true.
|
366
|
+
*
|
367
|
+
* re2 = RE2.new("woo?", :never_nl => true)
|
368
|
+
* re2.never_nl? #=> true
|
369
|
+
*/
|
370
|
+
static VALUE
|
371
|
+
re2_never_nl(VALUE self)
|
372
|
+
{
|
373
|
+
re2_pattern *p;
|
374
|
+
Data_Get_Struct(self, re2_pattern, p);
|
375
|
+
return BOOL2RUBY(p->pattern->options().never_nl());
|
376
|
+
}
|
377
|
+
|
378
|
+
/*
|
379
|
+
* call-seq:
|
380
|
+
* re2.case_sensitive? -> true or false
|
381
|
+
*
|
382
|
+
* Returns whether or not the regular expression +re2+
|
383
|
+
* was compiled with the case_sensitive option set to true.
|
384
|
+
*
|
385
|
+
* re2 = RE2.new("woo?", :case_sensitive => true)
|
386
|
+
* re2.case_sensitive? #=> true
|
387
|
+
*/
|
388
|
+
static VALUE
|
389
|
+
re2_case_sensitive(VALUE self)
|
390
|
+
{
|
391
|
+
re2_pattern *p;
|
392
|
+
Data_Get_Struct(self, re2_pattern, p);
|
393
|
+
return BOOL2RUBY(p->pattern->options().case_sensitive());
|
394
|
+
}
|
395
|
+
|
396
|
+
/*
|
397
|
+
* call-seq:
|
398
|
+
* re2.case_insensitive? -> true or false
|
399
|
+
* re2.casefold? -> true or false
|
400
|
+
*
|
401
|
+
* Returns whether or not the regular expression +re2+
|
402
|
+
* was compiled with the case_sensitive option set to false.
|
403
|
+
*
|
404
|
+
* re2 = RE2.new("woo?", :case_sensitive => true)
|
405
|
+
* re2.case_insensitive? #=> false
|
406
|
+
*/
|
407
|
+
static VALUE
|
408
|
+
re2_case_insensitive(VALUE self)
|
409
|
+
{
|
410
|
+
return BOOL2RUBY(re2_case_sensitive(self) != Qtrue);
|
411
|
+
}
|
412
|
+
|
413
|
+
/*
|
414
|
+
* call-seq:
|
415
|
+
* re2.perl_classes? -> true or false
|
416
|
+
*
|
417
|
+
* Returns whether or not the regular expression +re2+
|
418
|
+
* was compiled with the perl_classes option set to true.
|
419
|
+
*
|
420
|
+
* re2 = RE2.new("woo?", :perl_classes => true)
|
421
|
+
* re2.perl_classes? #=> true
|
422
|
+
*/
|
423
|
+
static VALUE
|
424
|
+
re2_perl_classes(VALUE self)
|
425
|
+
{
|
426
|
+
re2_pattern *p;
|
427
|
+
Data_Get_Struct(self, re2_pattern, p);
|
428
|
+
return BOOL2RUBY(p->pattern->options().perl_classes());
|
429
|
+
}
|
430
|
+
|
431
|
+
/*
|
432
|
+
* call-seq:
|
433
|
+
* re2.word_boundary? -> true or false
|
434
|
+
*
|
435
|
+
* Returns whether or not the regular expression +re2+
|
436
|
+
* was compiled with the word_boundary option set to true.
|
437
|
+
*
|
438
|
+
* re2 = RE2.new("woo?", :word_boundary => true)
|
439
|
+
* re2.word_boundary? #=> true
|
440
|
+
*/
|
441
|
+
static VALUE
|
442
|
+
re2_word_boundary(VALUE self)
|
443
|
+
{
|
444
|
+
re2_pattern *p;
|
445
|
+
Data_Get_Struct(self, re2_pattern, p);
|
446
|
+
return BOOL2RUBY(p->pattern->options().word_boundary());
|
447
|
+
}
|
448
|
+
|
449
|
+
/*
|
450
|
+
* call-seq:
|
451
|
+
* re2.one_line? -> true or false
|
452
|
+
*
|
453
|
+
* Returns whether or not the regular expression +re2+
|
454
|
+
* was compiled with the one_line option set to true.
|
455
|
+
*
|
456
|
+
* re2 = RE2.new("woo?", :one_line => true)
|
457
|
+
* re2.one_line? #=> true
|
458
|
+
*/
|
459
|
+
static VALUE
|
460
|
+
re2_one_line(VALUE self)
|
461
|
+
{
|
462
|
+
re2_pattern *p;
|
463
|
+
Data_Get_Struct(self, re2_pattern, p);
|
464
|
+
return BOOL2RUBY(p->pattern->options().one_line());
|
465
|
+
}
|
466
|
+
|
467
|
+
/*
|
468
|
+
* call-seq:
|
469
|
+
* re2.error -> error_str
|
470
|
+
*
|
471
|
+
* If the RE2 could not be created properly, returns an
|
472
|
+
* error string.
|
473
|
+
*/
|
474
|
+
static VALUE
|
475
|
+
re2_error(VALUE self)
|
476
|
+
{
|
477
|
+
re2_pattern *p;
|
478
|
+
Data_Get_Struct(self, re2_pattern, p);
|
479
|
+
return rb_str_new2(p->pattern->error().c_str());
|
480
|
+
}
|
481
|
+
|
482
|
+
/*
|
483
|
+
* call-seq:
|
484
|
+
* re2.error_arg -> error_str
|
485
|
+
*
|
486
|
+
* If the RE2 could not be created properly, returns
|
487
|
+
* the offending portion of the regexp.
|
488
|
+
*/
|
489
|
+
static VALUE
|
490
|
+
re2_error_arg(VALUE self)
|
491
|
+
{
|
492
|
+
re2_pattern *p;
|
493
|
+
Data_Get_Struct(self, re2_pattern, p);
|
494
|
+
return rb_str_new2(p->pattern->error_arg().c_str());
|
495
|
+
}
|
496
|
+
|
497
|
+
/*
|
498
|
+
* call-seq:
|
499
|
+
* re2.program_size -> size
|
500
|
+
*
|
501
|
+
* Returns the program size, a very approximate measure
|
502
|
+
* of a regexp's "cost". Larger numbers are more expensive
|
503
|
+
* than smaller numbers.
|
504
|
+
*/
|
505
|
+
static VALUE
|
506
|
+
re2_program_size(VALUE self)
|
507
|
+
{
|
508
|
+
re2_pattern *p;
|
509
|
+
Data_Get_Struct(self, re2_pattern, p);
|
510
|
+
return INT2FIX(p->pattern->ProgramSize());
|
511
|
+
}
|
512
|
+
|
513
|
+
/*
|
514
|
+
* call-seq:
|
515
|
+
* re2.options -> options_hash
|
516
|
+
*
|
517
|
+
* Returns a hash of the options currently set for
|
518
|
+
* +re2+.
|
519
|
+
*/
|
520
|
+
static VALUE
|
521
|
+
re2_options(VALUE self)
|
522
|
+
{
|
523
|
+
VALUE options;
|
524
|
+
re2_pattern *p;
|
525
|
+
|
526
|
+
Data_Get_Struct(self, re2_pattern, p);
|
527
|
+
options = rb_hash_new();
|
528
|
+
|
529
|
+
rb_hash_aset(options, ID2SYM(id_utf8),
|
530
|
+
BOOL2RUBY(p->pattern->options().utf8()));
|
531
|
+
|
532
|
+
rb_hash_aset(options, ID2SYM(id_posix_syntax),
|
533
|
+
BOOL2RUBY(p->pattern->options().posix_syntax()));
|
534
|
+
|
535
|
+
rb_hash_aset(options, ID2SYM(id_longest_match),
|
536
|
+
BOOL2RUBY(p->pattern->options().longest_match()));
|
537
|
+
|
538
|
+
rb_hash_aset(options, ID2SYM(id_log_errors),
|
539
|
+
BOOL2RUBY(p->pattern->options().log_errors()));
|
540
|
+
|
541
|
+
rb_hash_aset(options, ID2SYM(id_max_mem),
|
542
|
+
INT2FIX(p->pattern->options().max_mem()));
|
543
|
+
|
544
|
+
rb_hash_aset(options, ID2SYM(id_literal),
|
545
|
+
BOOL2RUBY(p->pattern->options().literal()));
|
546
|
+
|
547
|
+
rb_hash_aset(options, ID2SYM(id_never_nl),
|
548
|
+
BOOL2RUBY(p->pattern->options().never_nl()));
|
549
|
+
|
550
|
+
rb_hash_aset(options, ID2SYM(id_case_sensitive),
|
551
|
+
BOOL2RUBY(p->pattern->options().case_sensitive()));
|
552
|
+
|
553
|
+
rb_hash_aset(options, ID2SYM(id_perl_classes),
|
554
|
+
BOOL2RUBY(p->pattern->options().perl_classes()));
|
555
|
+
|
556
|
+
rb_hash_aset(options, ID2SYM(id_word_boundary),
|
557
|
+
BOOL2RUBY(p->pattern->options().word_boundary()));
|
558
|
+
|
559
|
+
rb_hash_aset(options, ID2SYM(id_one_line),
|
560
|
+
BOOL2RUBY(p->pattern->options().one_line()));
|
561
|
+
|
562
|
+
// This is a read-only hash after all...
|
563
|
+
OBJ_FREEZE(options);
|
564
|
+
|
565
|
+
return options;
|
566
|
+
}
|
567
|
+
|
568
|
+
/*
|
569
|
+
* call-seq:
|
570
|
+
* re2.number_of_capturing_groups -> int
|
571
|
+
*
|
572
|
+
* Returns the number of capturing subpatterns, or -1 if the regexp
|
573
|
+
* wasn't valid on construction. The overall match ($0) does not
|
574
|
+
* count: if the regexp is "(a)(b)", returns 2.
|
575
|
+
*/
|
576
|
+
static VALUE
|
577
|
+
re2_number_of_capturing_groups(VALUE self)
|
578
|
+
{
|
579
|
+
re2_pattern *p;
|
580
|
+
|
581
|
+
Data_Get_Struct(self, re2_pattern, p);
|
582
|
+
return INT2FIX(p->pattern->NumberOfCapturingGroups());
|
583
|
+
}
|
584
|
+
|
585
|
+
/*
|
586
|
+
* call-seq:
|
587
|
+
* re2.match(text) -> [match, match]
|
588
|
+
* re2.match(text, 0) -> true or false
|
589
|
+
* re2.match(text, num_of_matches) -> [match, match]
|
590
|
+
*
|
591
|
+
* Looks for the pattern in +re2+ in +text+; when specified
|
592
|
+
* without a second argument, will return an array of the matching
|
593
|
+
* pattern and all subpatterns. If the second argument is 0, a
|
594
|
+
* simple true or false will be returned to indicate a successful
|
595
|
+
* match. If the second argument is any integer greater than 0,
|
596
|
+
* that number of matches will be returned (padded with nils if
|
597
|
+
* there are insufficient matches).
|
598
|
+
*
|
599
|
+
* r = RE2.new('w(o)(o)')
|
600
|
+
* r.match('woo') #=> ["woo", "o", "o"]
|
601
|
+
* r.match('woo', 0) #=> true
|
602
|
+
* r.match('bob', 0) #=> false
|
603
|
+
* r.match('woo', 1) #=> ["woo", "o"]
|
604
|
+
*/
|
605
|
+
static VALUE
|
606
|
+
re2_match(int argc, VALUE *argv, VALUE self)
|
607
|
+
{
|
608
|
+
int n;
|
609
|
+
bool matched;
|
610
|
+
re2_pattern *p;
|
611
|
+
VALUE text, number_of_matches, matches;
|
612
|
+
re2::StringPiece *string_matches, *text_as_string_piece;
|
613
|
+
|
614
|
+
rb_scan_args(argc, argv, "11", &text, &number_of_matches);
|
615
|
+
|
616
|
+
Data_Get_Struct(self, re2_pattern, p);
|
617
|
+
|
618
|
+
if (RTEST(number_of_matches)) {
|
619
|
+
n = NUM2INT(number_of_matches);
|
620
|
+
} else {
|
621
|
+
n = p->pattern->NumberOfCapturingGroups();
|
622
|
+
}
|
623
|
+
|
624
|
+
text_as_string_piece = new re2::StringPiece(StringValuePtr(text));
|
625
|
+
|
626
|
+
if (n == 0) {
|
627
|
+
return BOOL2RUBY(p->pattern->Match(*text_as_string_piece, 0, RE2::UNANCHORED, 0, 0));
|
628
|
+
} else {
|
629
|
+
|
630
|
+
/* Because match returns the whole match as well. */
|
631
|
+
n += 1;
|
632
|
+
|
633
|
+
string_matches = new re2::StringPiece[n];
|
634
|
+
|
635
|
+
matched = p->pattern->Match(*text_as_string_piece, 0, RE2::UNANCHORED, string_matches, n);
|
636
|
+
|
637
|
+
if (matched) {
|
638
|
+
matches = rb_ary_new();
|
639
|
+
|
640
|
+
for (int i = 0; i < n; i++) {
|
641
|
+
if (!string_matches[i].empty()) {
|
642
|
+
rb_ary_push(matches, rb_str_new2(string_matches[i].as_string().c_str()));
|
643
|
+
} else {
|
644
|
+
rb_ary_push(matches, Qnil);
|
645
|
+
}
|
646
|
+
}
|
647
|
+
|
648
|
+
return matches;
|
649
|
+
} else {
|
650
|
+
return Qnil;
|
651
|
+
}
|
652
|
+
}
|
653
|
+
}
|
654
|
+
|
655
|
+
/*
|
656
|
+
* call-seq:
|
657
|
+
* re2.match?(text) -> true or false
|
658
|
+
* re2 =~ text -> true or false
|
659
|
+
*
|
660
|
+
* Returns true or false to indicate a successful match.
|
661
|
+
* Equivalent to +re2.match(text, 0)+.
|
662
|
+
*/
|
663
|
+
static VALUE
|
664
|
+
re2_match_query(VALUE self, VALUE text)
|
665
|
+
{
|
666
|
+
VALUE argv[2];
|
667
|
+
argv[0] = text;
|
668
|
+
argv[1] = INT2FIX(0);
|
669
|
+
|
670
|
+
return re2_match(2, argv, self);
|
671
|
+
}
|
672
|
+
|
673
|
+
/*
|
674
|
+
* call-seq:
|
675
|
+
* re2 !~ text -> true or false
|
676
|
+
*
|
677
|
+
* Returns true or false to indicate an unsuccessful match.
|
678
|
+
* Equivalent to +!re2.match(text, 0)+.
|
679
|
+
*/
|
680
|
+
static VALUE
|
681
|
+
re2_bang_tilde(VALUE self, VALUE text)
|
682
|
+
{
|
683
|
+
return BOOL2RUBY(re2_match_query(self, text) != Qtrue);
|
684
|
+
}
|
685
|
+
|
686
|
+
/*
|
687
|
+
* call-seq:
|
688
|
+
* RE2::FullMatch(text, re) -> true or false
|
689
|
+
*
|
690
|
+
* Returns whether or not a full match for +re2+ was
|
691
|
+
* found in text.
|
692
|
+
*
|
693
|
+
* RE2::FullMatch("woo", "wo+") #=> true
|
694
|
+
* RE2::FullMatch("woo", "a") #=> false
|
695
|
+
* re2 = RE2.new("woo")
|
696
|
+
* RE2::FullMatch("woo", re2) #=> true
|
697
|
+
*/
|
698
|
+
static VALUE
|
699
|
+
re2_FullMatch(VALUE self, VALUE text, VALUE re)
|
700
|
+
{
|
701
|
+
UNUSED(self);
|
702
|
+
bool result;
|
703
|
+
re2_pattern *p;
|
704
|
+
|
705
|
+
if (rb_obj_is_kind_of(re, re2_cRE2)) {
|
706
|
+
Data_Get_Struct(re, re2_pattern, p);
|
707
|
+
result = RE2::FullMatch(StringValuePtr(text), *p->pattern);
|
708
|
+
} else {
|
709
|
+
result = RE2::FullMatch(StringValuePtr(text), StringValuePtr(re));
|
710
|
+
}
|
711
|
+
|
712
|
+
return BOOL2RUBY(result);
|
713
|
+
}
|
714
|
+
|
715
|
+
/*
|
716
|
+
* call-seq:
|
717
|
+
* RE2::FullMatchN(text, re) -> array of matches
|
718
|
+
*
|
719
|
+
* Returns an array of successful matches as defined in
|
720
|
+
* +re+ for +text+.
|
721
|
+
*
|
722
|
+
* RE2::FullMatchN("woo", "w(oo)") #=> ["oo"]
|
723
|
+
*/
|
724
|
+
static VALUE
|
725
|
+
re2_FullMatchN(VALUE self, VALUE text, VALUE re)
|
726
|
+
{
|
727
|
+
UNUSED(self);
|
728
|
+
int n;
|
729
|
+
bool matched;
|
730
|
+
re2_pattern *p;
|
731
|
+
VALUE matches;
|
732
|
+
RE2 *compiled_pattern;
|
733
|
+
RE2::Arg *argv;
|
734
|
+
const RE2::Arg **args;
|
735
|
+
std::string *string_matches;
|
736
|
+
|
737
|
+
if (rb_obj_is_kind_of(re, re2_cRE2)) {
|
738
|
+
Data_Get_Struct(re, re2_pattern, p);
|
739
|
+
compiled_pattern = p->pattern;
|
740
|
+
} else {
|
741
|
+
compiled_pattern = new RE2(StringValuePtr(re));
|
742
|
+
}
|
743
|
+
|
744
|
+
n = compiled_pattern->NumberOfCapturingGroups();
|
745
|
+
|
746
|
+
argv = new RE2::Arg[n];
|
747
|
+
args = new const RE2::Arg*[n];
|
748
|
+
string_matches = new std::string[n];
|
749
|
+
|
750
|
+
for (int i = 0; i < n; i++) {
|
751
|
+
args[i] = &argv[i];
|
752
|
+
argv[i] = &string_matches[i];
|
753
|
+
}
|
754
|
+
|
755
|
+
matched = RE2::FullMatchN(StringValuePtr(text), *compiled_pattern, args, n);
|
756
|
+
|
757
|
+
if (matched) {
|
758
|
+
matches = rb_ary_new();
|
759
|
+
|
760
|
+
for (int i = 0; i < n; i++) {
|
761
|
+
if (!string_matches[i].empty()) {
|
762
|
+
rb_ary_push(matches, rb_str_new2(string_matches[i].c_str()));
|
763
|
+
} else {
|
764
|
+
rb_ary_push(matches, Qnil);
|
765
|
+
}
|
766
|
+
}
|
767
|
+
|
768
|
+
return matches;
|
769
|
+
} else {
|
770
|
+
return Qnil;
|
771
|
+
}
|
772
|
+
}
|
773
|
+
|
774
|
+
/*
|
775
|
+
* call-seq:
|
776
|
+
* RE2::PartialMatchN(text, re) -> array of matches
|
777
|
+
*
|
778
|
+
* Returns an array of successful matches as defined in
|
779
|
+
* +re+ for +text+.
|
780
|
+
*
|
781
|
+
* RE2::PartialMatchN("woo", "w(oo)") #=> ["oo"]
|
782
|
+
*/
|
783
|
+
static VALUE
|
784
|
+
re2_PartialMatchN(VALUE self, VALUE text, VALUE re)
|
785
|
+
{
|
786
|
+
UNUSED(self);
|
787
|
+
int n;
|
788
|
+
bool matched;
|
789
|
+
re2_pattern *p;
|
790
|
+
VALUE matches;
|
791
|
+
RE2 *compiled_pattern;
|
792
|
+
RE2::Arg *argv;
|
793
|
+
const RE2::Arg **args;
|
794
|
+
std::string *string_matches;
|
795
|
+
|
796
|
+
if (rb_obj_is_kind_of(re, re2_cRE2)) {
|
797
|
+
Data_Get_Struct(re, re2_pattern, p);
|
798
|
+
compiled_pattern = p->pattern;
|
799
|
+
} else {
|
800
|
+
compiled_pattern = new RE2(StringValuePtr(re));
|
801
|
+
}
|
802
|
+
|
803
|
+
n = compiled_pattern->NumberOfCapturingGroups();
|
804
|
+
|
805
|
+
argv = new RE2::Arg[n];
|
806
|
+
args = new const RE2::Arg*[n];
|
807
|
+
string_matches = new std::string[n];
|
808
|
+
|
809
|
+
for (int i = 0; i < n; i++) {
|
810
|
+
args[i] = &argv[i];
|
811
|
+
argv[i] = &string_matches[i];
|
812
|
+
}
|
813
|
+
|
814
|
+
matched = RE2::PartialMatchN(StringValuePtr(text), *compiled_pattern, args, n);
|
815
|
+
|
816
|
+
if (matched) {
|
817
|
+
matches = rb_ary_new();
|
818
|
+
|
819
|
+
for (int i = 0; i < n; i++) {
|
820
|
+
if (!string_matches[i].empty()) {
|
821
|
+
rb_ary_push(matches, rb_str_new2(string_matches[i].c_str()));
|
822
|
+
} else {
|
823
|
+
rb_ary_push(matches, Qnil);
|
824
|
+
}
|
825
|
+
}
|
826
|
+
|
827
|
+
return matches;
|
828
|
+
} else {
|
829
|
+
return Qnil;
|
830
|
+
}
|
831
|
+
}
|
832
|
+
|
833
|
+
/*
|
834
|
+
* call-seq:
|
835
|
+
* RE2::PartialMatch(text, re) -> true or false
|
836
|
+
*
|
837
|
+
* Returns whether or not a partial match for +re2+ was
|
838
|
+
* found in text.
|
839
|
+
*
|
840
|
+
* RE2::PartialMatch("woo", "o+") #=> true
|
841
|
+
* RE2::PartialMatch("woo", "a") #=> false
|
842
|
+
* re2 = RE2.new("oo?")
|
843
|
+
* RE2::PartialMatch("woo", re2) #=> true
|
844
|
+
*/
|
845
|
+
static VALUE
|
846
|
+
re2_PartialMatch(VALUE self, VALUE text, VALUE re)
|
847
|
+
{
|
848
|
+
UNUSED(self);
|
849
|
+
bool result;
|
850
|
+
re2_pattern *p;
|
851
|
+
|
852
|
+
if (rb_obj_is_kind_of(re, re2_cRE2)) {
|
853
|
+
Data_Get_Struct(re, re2_pattern, p);
|
854
|
+
result = RE2::PartialMatch(StringValuePtr(text), *p->pattern);
|
855
|
+
} else {
|
856
|
+
result = RE2::PartialMatch(StringValuePtr(text), StringValuePtr(re));
|
857
|
+
}
|
858
|
+
|
859
|
+
return BOOL2RUBY(result);
|
860
|
+
}
|
861
|
+
|
862
|
+
/*
|
863
|
+
* call-seq:
|
864
|
+
* RE2::Replace(str, pattern, rewrite) -> str
|
865
|
+
*
|
866
|
+
* Replaces the first occurrence +pattern+ in +str+ with
|
867
|
+
* +rewrite+ <i>in place</i>.
|
868
|
+
*
|
869
|
+
* RE2::Replace("hello there", "hello", "howdy") #=> "howdy there"
|
870
|
+
* re2 = RE2.new("hel+o")
|
871
|
+
* RE2::Replace("hello there", re2, "yo") #=> "yo there"
|
872
|
+
* text = "Good morning"
|
873
|
+
* RE2::Replace(text, "morn", "even") #=> "Good evening"
|
874
|
+
* text #=> "Good evening"
|
875
|
+
*/
|
876
|
+
static VALUE
|
877
|
+
re2_Replace(VALUE self, VALUE str, VALUE pattern, VALUE rewrite)
|
878
|
+
{
|
879
|
+
UNUSED(self);
|
880
|
+
VALUE repl;
|
881
|
+
re2_pattern *p;
|
882
|
+
|
883
|
+
// Convert all the inputs to be pumped into RE2::Replace.
|
884
|
+
std::string str_as_string(StringValuePtr(str));
|
885
|
+
re2::StringPiece rewrite_as_string_piece(StringValuePtr(rewrite));
|
886
|
+
|
887
|
+
// Do the replacement.
|
888
|
+
if (rb_obj_is_kind_of(pattern, re2_cRE2)) {
|
889
|
+
Data_Get_Struct(pattern, re2_pattern, p);
|
890
|
+
RE2::Replace(&str_as_string, *p->pattern, rewrite_as_string_piece);
|
891
|
+
} else {
|
892
|
+
RE2::Replace(&str_as_string, StringValuePtr(pattern), rewrite_as_string_piece);
|
893
|
+
}
|
894
|
+
|
895
|
+
// Save the replacement as a VALUE.
|
896
|
+
repl = rb_str_new(str_as_string.c_str(), str_as_string.length());
|
897
|
+
|
898
|
+
// Replace the original string with the replacement.
|
899
|
+
rb_str_update(str, 0, RSTRING_LEN(str), repl);
|
900
|
+
|
901
|
+
return str;
|
902
|
+
}
|
903
|
+
|
904
|
+
/*
|
905
|
+
* call-seq:
|
906
|
+
* RE2::GlobalReplace(str, pattern, rewrite) -> str
|
907
|
+
*
|
908
|
+
* Replaces every occurrence of +pattern+ in +str+ with
|
909
|
+
* +rewrite+ <i>in place</i>.
|
910
|
+
*
|
911
|
+
* RE2::GlobalReplace("hello there", "e", "i") #=> "hillo thiri"
|
912
|
+
* re2 = RE2.new("oo?")
|
913
|
+
* RE2::GlobalReplace("whoops-doops", re2, "e") #=> "wheps-deps"
|
914
|
+
* text = "Good morning"
|
915
|
+
* RE2::GlobalReplace(text, "o", "ee") #=> "Geeeed meerning"
|
916
|
+
* text #=> "Geeeed meerning"
|
917
|
+
*/
|
918
|
+
static VALUE
|
919
|
+
re2_GlobalReplace(VALUE self, VALUE str, VALUE pattern, VALUE rewrite)
|
920
|
+
{
|
921
|
+
UNUSED(self);
|
922
|
+
|
923
|
+
// Convert all the inputs to be pumped into RE2::GlobalReplace.
|
924
|
+
re2_pattern *p;
|
925
|
+
std::string str_as_string(StringValuePtr(str));
|
926
|
+
re2::StringPiece rewrite_as_string_piece(StringValuePtr(rewrite));
|
927
|
+
VALUE repl;
|
928
|
+
|
929
|
+
// Do the replacement.
|
930
|
+
if (rb_obj_is_kind_of(pattern, re2_cRE2)) {
|
931
|
+
Data_Get_Struct(pattern, re2_pattern, p);
|
932
|
+
RE2::GlobalReplace(&str_as_string, *p->pattern, rewrite_as_string_piece);
|
933
|
+
} else {
|
934
|
+
RE2::GlobalReplace(&str_as_string, StringValuePtr(pattern), rewrite_as_string_piece);
|
935
|
+
}
|
936
|
+
|
937
|
+
// Save the replacement as a VALUE.
|
938
|
+
repl = rb_str_new(str_as_string.c_str(), str_as_string.length());
|
939
|
+
|
940
|
+
// Replace the original string with the replacement.
|
941
|
+
rb_str_update(str, 0, RSTRING_LEN(str), repl);
|
942
|
+
|
943
|
+
return str;
|
944
|
+
}
|
945
|
+
|
946
|
+
/*
|
947
|
+
* call-seq:
|
948
|
+
* RE2::QuoteMeta(str) -> str
|
949
|
+
* RE2.escape(str) -> str
|
950
|
+
* RE2.quote(str) -> str
|
951
|
+
*
|
952
|
+
* Returns a version of str with all potentially meaningful regexp
|
953
|
+
* characters escaped. The returned string, used as a regular
|
954
|
+
* expression, will exactly match the original string.
|
955
|
+
*
|
956
|
+
* RE2::QuoteMeta("1.5-2.0?") #=> "1\.5\-2\.0\?"
|
957
|
+
*/
|
958
|
+
static VALUE
|
959
|
+
re2_QuoteMeta(VALUE self, VALUE unquoted)
|
960
|
+
{
|
961
|
+
UNUSED(self);
|
962
|
+
re2::StringPiece unquoted_as_string_piece(StringValuePtr(unquoted));
|
963
|
+
return rb_str_new2(RE2::QuoteMeta(unquoted_as_string_piece).c_str());
|
964
|
+
}
|
965
|
+
|
966
|
+
void
|
967
|
+
Init_re2()
|
968
|
+
{
|
969
|
+
re2_cRE2 = rb_define_class("RE2", rb_cObject);
|
970
|
+
rb_define_alloc_func(re2_cRE2, (VALUE (*)(VALUE))re2_allocate);
|
971
|
+
rb_define_method(re2_cRE2, "initialize", (VALUE (*)(...))re2_initialize, -1);
|
972
|
+
rb_define_method(re2_cRE2, "ok?", (VALUE (*)(...))re2_ok, 0);
|
973
|
+
rb_define_method(re2_cRE2, "error", (VALUE (*)(...))re2_error, 0);
|
974
|
+
rb_define_method(re2_cRE2, "error_arg", (VALUE (*)(...))re2_error_arg, 0);
|
975
|
+
rb_define_method(re2_cRE2, "program_size", (VALUE (*)(...))re2_program_size, 0);
|
976
|
+
rb_define_method(re2_cRE2, "options", (VALUE (*)(...))re2_options, 0);
|
977
|
+
rb_define_method(re2_cRE2, "number_of_capturing_groups", (VALUE (*)(...))re2_number_of_capturing_groups, 0);
|
978
|
+
rb_define_method(re2_cRE2, "match", (VALUE (*)(...))re2_match, -1);
|
979
|
+
rb_define_method(re2_cRE2, "match?", (VALUE (*)(...))re2_match_query, 1);
|
980
|
+
rb_define_method(re2_cRE2, "=~", (VALUE (*)(...))re2_match_query, 1);
|
981
|
+
rb_define_method(re2_cRE2, "===", (VALUE (*)(...))re2_match_query, 1);
|
982
|
+
rb_define_method(re2_cRE2, "!~", (VALUE (*)(...))re2_bang_tilde, 1);
|
983
|
+
rb_define_method(re2_cRE2, "to_s", (VALUE (*)(...))re2_to_s, 0);
|
984
|
+
rb_define_method(re2_cRE2, "to_str", (VALUE (*)(...))re2_to_s, 0);
|
985
|
+
rb_define_method(re2_cRE2, "pattern", (VALUE (*)(...))re2_to_s, 0);
|
986
|
+
rb_define_method(re2_cRE2, "source", (VALUE (*)(...))re2_to_s, 0);
|
987
|
+
rb_define_method(re2_cRE2, "inspect", (VALUE (*)(...))re2_inspect, 0);
|
988
|
+
rb_define_method(re2_cRE2, "utf8?", (VALUE (*)(...))re2_utf8, 0);
|
989
|
+
rb_define_method(re2_cRE2, "posix_syntax?", (VALUE (*)(...))re2_posix_syntax, 0);
|
990
|
+
rb_define_method(re2_cRE2, "longest_match?", (VALUE (*)(...))re2_longest_match, 0);
|
991
|
+
rb_define_method(re2_cRE2, "log_errors?", (VALUE (*)(...))re2_log_errors, 0);
|
992
|
+
rb_define_method(re2_cRE2, "max_mem", (VALUE (*)(...))re2_max_mem, 0);
|
993
|
+
rb_define_method(re2_cRE2, "literal?", (VALUE (*)(...))re2_literal, 0);
|
994
|
+
rb_define_method(re2_cRE2, "never_nl?", (VALUE (*)(...))re2_never_nl, 0);
|
995
|
+
rb_define_method(re2_cRE2, "case_sensitive?", (VALUE (*)(...))re2_case_sensitive, 0);
|
996
|
+
rb_define_method(re2_cRE2, "case_insensitive?", (VALUE (*)(...))re2_case_insensitive, 0);
|
997
|
+
rb_define_method(re2_cRE2, "casefold?", (VALUE (*)(...))re2_case_insensitive, 0);
|
998
|
+
rb_define_method(re2_cRE2, "perl_classes?", (VALUE (*)(...))re2_perl_classes, 0);
|
999
|
+
rb_define_method(re2_cRE2, "word_boundary?", (VALUE (*)(...))re2_word_boundary, 0);
|
1000
|
+
rb_define_method(re2_cRE2, "one_line?", (VALUE (*)(...))re2_one_line, 0);
|
1001
|
+
rb_define_singleton_method(re2_cRE2, "FullMatch", (VALUE (*)(...))re2_FullMatch, 2);
|
1002
|
+
rb_define_singleton_method(re2_cRE2, "FullMatchN", (VALUE (*)(...))re2_FullMatchN, 2);
|
1003
|
+
rb_define_singleton_method(re2_cRE2, "PartialMatch", (VALUE (*)(...))re2_PartialMatch, 2);
|
1004
|
+
rb_define_singleton_method(re2_cRE2, "PartialMatchN", (VALUE (*)(...))re2_PartialMatchN, 2);
|
1005
|
+
rb_define_singleton_method(re2_cRE2, "Replace", (VALUE (*)(...))re2_Replace, 3);
|
1006
|
+
rb_define_singleton_method(re2_cRE2, "GlobalReplace", (VALUE (*)(...))re2_GlobalReplace, 3);
|
1007
|
+
rb_define_singleton_method(re2_cRE2, "QuoteMeta", (VALUE (*)(...))re2_QuoteMeta, 1);
|
1008
|
+
rb_define_singleton_method(re2_cRE2, "escape", (VALUE (*)(...))re2_QuoteMeta, 1);
|
1009
|
+
rb_define_singleton_method(re2_cRE2, "quote", (VALUE (*)(...))re2_QuoteMeta, 1);
|
1010
|
+
rb_define_singleton_method(re2_cRE2, "compile", (VALUE (*)(...))rb_class_new_instance, -1);
|
1011
|
+
rb_define_global_function("RE2", (VALUE (*)(...))re2_re2, -1);
|
1012
|
+
|
1013
|
+
/* Create the symbols used in options. */
|
1014
|
+
id_utf8 = rb_intern("utf8");
|
1015
|
+
id_posix_syntax = rb_intern("posix_syntax");
|
1016
|
+
id_longest_match = rb_intern("longest_match");
|
1017
|
+
id_log_errors = rb_intern("log_errors");
|
1018
|
+
id_max_mem = rb_intern("max_mem");
|
1019
|
+
id_literal = rb_intern("literal");
|
1020
|
+
id_never_nl = rb_intern("never_nl");
|
1021
|
+
id_case_sensitive = rb_intern("case_sensitive");
|
1022
|
+
id_perl_classes = rb_intern("perl_classes");
|
1023
|
+
id_word_boundary = rb_intern("word_boundary");
|
1024
|
+
id_one_line = rb_intern("one_line");
|
1025
|
+
}
|
1026
|
+
}
|