re2 0.5.0 → 0.6.0.pre
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE.txt +1 -1
- data/README.md +71 -19
- data/Rakefile +1 -3
- data/ext/re2/re2.cc +353 -201
- data/lib/re2.rb +7 -0
- data/lib/re2/consumer.rb +15 -0
- data/lib/re2/string.rb +1 -1
- data/spec/re2/consumer_spec.rb +90 -0
- data/spec/re2/match_data_spec.rb +16 -0
- data/spec/re2/regexp_spec.rb +9 -0
- data/spec/re2/string_spec.rb +2 -2
- data/spec/re2_spec.rb +8 -0
- metadata +26 -19
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 65fc1ca01f0f974602264152462be26110297250
|
4
|
+
data.tar.gz: e5fdd3a7c238d629d75d76032b557658f993553b
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: cbc9be6aef0659c89a13c0481b951467f009702e13cd4c6c426d4851c563ad16888959e2c878c79090e221909cd18f391b2bf3026b3cdaaf9d86317884e11c07
|
7
|
+
data.tar.gz: 94b8ea6f3f3cf1b353fa82c70611e2623c93fea6c189f1bac02435c8bf3a5cacfde032ad95b5a855ea4e1c63f3f94a0b66b1174ad42a269e6e5d6ed4b9793df3
|
data/LICENSE.txt
CHANGED
data/README.md
CHANGED
@@ -1,34 +1,45 @@
|
|
1
1
|
re2 [](http://travis-ci.org/mudge/re2)
|
2
2
|
===
|
3
3
|
|
4
|
-
A Ruby binding to [re2][], an "efficient, principled regular expression
|
4
|
+
A Ruby binding to [re2][], an "efficient, principled regular expression
|
5
|
+
library".
|
5
6
|
|
6
7
|
Installation
|
7
8
|
------------
|
8
9
|
|
9
|
-
You will need [re2][] installed as well as a C++ compiler such as [gcc][] (on
|
10
|
+
You will need [re2][] installed as well as a C++ compiler such as [gcc][] (on
|
11
|
+
Debian and Ubuntu, this is provided by the [build-essential][] package). If
|
12
|
+
you are using Mac OS X, I recommend installing re2 with [Homebrew][] by
|
13
|
+
running the following:
|
10
14
|
|
11
|
-
$ brew install
|
15
|
+
$ brew install re2
|
12
16
|
|
13
17
|
If you are using Debian, you can install the [libre2-dev][] package like so:
|
14
18
|
|
15
19
|
$ sudo apt-get install libre2-dev
|
16
20
|
|
17
|
-
If you are using a packaged Ruby distribution, make sure you also have the
|
21
|
+
If you are using a packaged Ruby distribution, make sure you also have the
|
22
|
+
Ruby header files installed such as those provided by the [ruby-dev][] package
|
23
|
+
on Debian and Ubuntu.
|
18
24
|
|
19
|
-
You can then install the library via RubyGems with `gem install re2` or `gem
|
25
|
+
You can then install the library via RubyGems with `gem install re2` or `gem
|
26
|
+
install re2 -- --with-re2-dir=/opt/local/re2` if re2 is not installed in the
|
27
|
+
default location of `/usr/local/`.
|
20
28
|
|
21
29
|
Documentation
|
22
30
|
-------------
|
23
31
|
|
24
|
-
Full documentation automatically generated from the latest version is
|
32
|
+
Full documentation automatically generated from the latest version is
|
33
|
+
available at <http://rubydoc.info/github/mudge/re2>.
|
25
34
|
|
26
|
-
Bear in mind that re2's regular expression syntax differs from PCRE, see the
|
35
|
+
Bear in mind that re2's regular expression syntax differs from PCRE, see the
|
36
|
+
[official syntax page][] for more details.
|
27
37
|
|
28
38
|
Usage
|
29
39
|
-----
|
30
40
|
|
31
|
-
You can use re2 as a mostly drop-in replacement for Ruby's own [Regexp][] and
|
41
|
+
You can use re2 as a mostly drop-in replacement for Ruby's own [Regexp][] and
|
42
|
+
[MatchData][] classes:
|
32
43
|
|
33
44
|
```console
|
34
45
|
$ irb -rubygems
|
@@ -49,14 +60,17 @@ $ irb -rubygems
|
|
49
60
|
=> nil
|
50
61
|
```
|
51
62
|
|
52
|
-
As `RE2::Regexp.new` (or `RE2::Regexp.compile`) can be quite verbose, a helper
|
63
|
+
As `RE2::Regexp.new` (or `RE2::Regexp.compile`) can be quite verbose, a helper
|
64
|
+
method has been defined against `Kernel` so you can use a shorter version to
|
65
|
+
create regular expressions:
|
53
66
|
|
54
67
|
```console
|
55
68
|
> RE2('(\d+)')
|
56
69
|
=> #<RE2::Regexp /(\d+)/>
|
57
70
|
```
|
58
71
|
|
59
|
-
Note the use of *single quotes* as double quotes will interpret `\d` as `d` as
|
72
|
+
Note the use of *single quotes* as double quotes will interpret `\d` as `d` as
|
73
|
+
in the following example:
|
60
74
|
|
61
75
|
```console
|
62
76
|
> RE2("(\d+)")
|
@@ -76,7 +90,8 @@ As of 0.3.0, you can use named groups:
|
|
76
90
|
=> "40"
|
77
91
|
```
|
78
92
|
|
79
|
-
As of 0.4.0, you can mix `RE2::String` into strings to provide helpers from
|
93
|
+
As of 0.4.0, you can mix `RE2::String` into strings to provide helpers from
|
94
|
+
the opposite direction:
|
80
95
|
|
81
96
|
```console
|
82
97
|
> require "re2/string"
|
@@ -102,28 +117,64 @@ class String
|
|
102
117
|
end
|
103
118
|
```
|
104
119
|
|
120
|
+
As of 0.5.0, you can use `RE2::Regexp#consume` to incrementally scan text for
|
121
|
+
matches (similar in purpose to Ruby's
|
122
|
+
[`String#scan`](http://ruby-doc.org/core-2.0.0/String.html#method-i-scan)).
|
123
|
+
Calling `consume` will return an `RE2::Consumer` which is
|
124
|
+
[enumerable](http://ruby-doc.org/core-2.0.0/Enumerable.html) meaning you can
|
125
|
+
use `each` to iterate through the matches (and even use
|
126
|
+
[`Enumerator::Lazy`](http://ruby-doc.org/core-2.0/Enumerator/Lazy.html)):
|
127
|
+
|
128
|
+
```ruby
|
129
|
+
re = RE2('(\w+)')
|
130
|
+
consumer = re.consume("It is a truth universally acknowledged")
|
131
|
+
consumer.each do |match|
|
132
|
+
puts match
|
133
|
+
end
|
134
|
+
|
135
|
+
consumer.rewind
|
136
|
+
|
137
|
+
enum = consumer.to_enum
|
138
|
+
enum.next #=> ["It"]
|
139
|
+
enum.next #=> ["is"]
|
140
|
+
```
|
141
|
+
|
105
142
|
Features
|
106
143
|
--------
|
107
144
|
|
108
|
-
* Pre-compiling regular expressions with
|
145
|
+
* Pre-compiling regular expressions with
|
146
|
+
[`RE2::Regexp.new(re)`](http://code.google.com/p/re2/source/browse/re2/re2.h#96),
|
147
|
+
`RE2::Regexp.compile(re)` or `RE2(re)` (including specifying options, e.g.
|
148
|
+
`RE2::Regexp.new("pattern", :case_sensitive => false)`
|
109
149
|
|
110
|
-
* Extracting matches with `re2.match(text)` (and an exact number of matches
|
150
|
+
* Extracting matches with `re2.match(text)` (and an exact number of matches
|
151
|
+
with `re2.match(text, number_of_matches)` such as `re2.match("123-234", 2)`)
|
111
152
|
|
112
153
|
* Extracting matches by name (both with strings and symbols)
|
113
154
|
|
114
|
-
* Checking for matches with `re2 =~ text`, `re2 === text` (for use in `case`
|
155
|
+
* Checking for matches with `re2 =~ text`, `re2 === text` (for use in `case`
|
156
|
+
statements) and `re2 !~ text`
|
115
157
|
|
116
|
-
*
|
158
|
+
* Incrementally scanning text with `re2.consume(text)`
|
159
|
+
|
160
|
+
* Checking regular expression compilation with `re2.ok?`, `re2.error` and
|
161
|
+
`re2.error_arg`
|
117
162
|
|
118
163
|
* Checking regular expression "cost" with `re2.program_size`
|
119
164
|
|
120
|
-
* Checking the options for an expression with `re2.options` or individually
|
165
|
+
* Checking the options for an expression with `re2.options` or individually
|
166
|
+
with `re2.case_sensitive?`
|
121
167
|
|
122
|
-
* Performing in-place replacement with [`RE2.Replace(str, pattern,
|
168
|
+
* Performing in-place replacement with [`RE2.Replace(str, pattern,
|
169
|
+
replace)`](http://code.google.com/p/re2/source/browse/re2/re2.h#335)
|
123
170
|
|
124
|
-
* Performing in-place global replacement with [`RE2.GlobalReplace(str,
|
171
|
+
* Performing in-place global replacement with [`RE2.GlobalReplace(str,
|
172
|
+
pattern,
|
173
|
+
replace)`](http://code.google.com/p/re2/source/browse/re2/re2.h#352)
|
125
174
|
|
126
|
-
* Escaping regular expressions with
|
175
|
+
* Escaping regular expressions with
|
176
|
+
[`RE2::Regexp.escape(unquoted)`](http://code.google.com/p/re2/source/browse/re2/re2.h#377),
|
177
|
+
`RE2::Regexp.quote(unquoted)` or `RE2.QuoteMeta(unquoted)`
|
127
178
|
|
128
179
|
Contact
|
129
180
|
-------
|
@@ -139,3 +190,4 @@ All feedback should go to the mailing list: <mailto:ruby.re2@librelist.com>
|
|
139
190
|
[Homebrew]: http://mxcl.github.com/homebrew
|
140
191
|
[libre2-dev]: http://packages.debian.org/search?keywords=libre2-dev
|
141
192
|
[official syntax page]: http://code.google.com/p/re2/wiki/Syntax
|
193
|
+
|
data/Rakefile
CHANGED
data/ext/re2/re2.cc
CHANGED
@@ -2,18 +2,35 @@
|
|
2
2
|
* re2 (http://github.com/mudge/re2)
|
3
3
|
* Ruby bindings to re2, an "efficient, principled regular expression library"
|
4
4
|
*
|
5
|
-
* Copyright (c) 2010-
|
5
|
+
* Copyright (c) 2010-2013, Paul Mucur (http://mudge.name)
|
6
6
|
* Released under the BSD Licence, please see LICENSE.txt
|
7
7
|
*/
|
8
8
|
|
9
9
|
#include <re2/re2.h>
|
10
|
+
#include <ruby.h>
|
10
11
|
#include <string>
|
11
12
|
#include <sstream>
|
12
|
-
|
13
|
+
#include <vector>
|
14
|
+
using std::string;
|
15
|
+
using std::ostringstream;
|
16
|
+
using std::nothrow;
|
17
|
+
using std::map;
|
18
|
+
using std::vector;
|
13
19
|
|
14
20
|
extern "C" {
|
15
|
-
|
16
|
-
|
21
|
+
#ifdef HAVE_RUBY_ENCODING_H
|
22
|
+
#include <ruby/encoding.h>
|
23
|
+
#define ENCODED_STR_NEW(str, length, encoding) \
|
24
|
+
({ \
|
25
|
+
VALUE _string = rb_str_new((const char *)str, (long)length); \
|
26
|
+
int _enc = rb_enc_find_index((int)encoding); \
|
27
|
+
rb_enc_associate_index(_string, _enc); \
|
28
|
+
_string; \
|
29
|
+
})
|
30
|
+
#else
|
31
|
+
#define ENCODED_STR_NEW(str, length, encoding) \
|
32
|
+
rb_str_new((const char *)str, (long)length)
|
33
|
+
#endif
|
17
34
|
|
18
35
|
#define BOOL2RUBY(v) (v ? Qtrue : Qfalse)
|
19
36
|
#define UNUSED(x) ((void)x)
|
@@ -44,41 +61,57 @@ extern "C" {
|
|
44
61
|
VALUE regexp, text;
|
45
62
|
} re2_matchdata;
|
46
63
|
|
47
|
-
|
64
|
+
typedef struct {
|
65
|
+
re2::StringPiece input;
|
66
|
+
int argc;
|
67
|
+
VALUE regexp, text;
|
68
|
+
} re2_consumer;
|
69
|
+
|
70
|
+
VALUE re2_mRE2, re2_cRegexp, re2_cMatchData, re2_cConsumer;
|
48
71
|
|
49
72
|
/* Symbols used in RE2 options. */
|
50
73
|
static ID id_utf8, id_posix_syntax, id_longest_match, id_log_errors,
|
51
74
|
id_max_mem, id_literal, id_never_nl, id_case_sensitive,
|
52
75
|
id_perl_classes, id_word_boundary, id_one_line;
|
53
76
|
|
54
|
-
void re2_matchdata_mark(re2_matchdata* self)
|
55
|
-
{
|
77
|
+
void re2_matchdata_mark(re2_matchdata* self) {
|
56
78
|
rb_gc_mark(self->regexp);
|
57
79
|
rb_gc_mark(self->text);
|
58
80
|
}
|
59
81
|
|
60
|
-
void re2_matchdata_free(re2_matchdata* self)
|
61
|
-
{
|
82
|
+
void re2_matchdata_free(re2_matchdata* self) {
|
62
83
|
if (self->matches) {
|
63
84
|
delete[] self->matches;
|
64
85
|
}
|
65
86
|
free(self);
|
66
87
|
}
|
67
88
|
|
68
|
-
void
|
69
|
-
|
70
|
-
|
89
|
+
void re2_consumer_mark(re2_consumer* self) {
|
90
|
+
rb_gc_mark(self->regexp);
|
91
|
+
rb_gc_mark(self->text);
|
92
|
+
}
|
93
|
+
|
94
|
+
void re2_consumer_free(re2_consumer* self) {
|
95
|
+
free(self);
|
96
|
+
}
|
97
|
+
|
98
|
+
void re2_regexp_free(re2_pattern* self) {
|
71
99
|
if (self->pattern) {
|
72
100
|
delete self->pattern;
|
73
101
|
}
|
74
102
|
free(self);
|
75
103
|
}
|
76
104
|
|
77
|
-
static VALUE
|
78
|
-
re2_matchdata_allocate(VALUE klass)
|
79
|
-
{
|
105
|
+
static VALUE re2_matchdata_allocate(VALUE klass) {
|
80
106
|
re2_matchdata *m;
|
81
|
-
return Data_Make_Struct(klass, re2_matchdata, re2_matchdata_mark,
|
107
|
+
return Data_Make_Struct(klass, re2_matchdata, re2_matchdata_mark,
|
108
|
+
re2_matchdata_free, m);
|
109
|
+
}
|
110
|
+
|
111
|
+
static VALUE re2_consumer_allocate(VALUE klass) {
|
112
|
+
re2_consumer *c;
|
113
|
+
return Data_Make_Struct(klass, re2_consumer, re2_consumer_mark,
|
114
|
+
re2_consumer_free, c);
|
82
115
|
}
|
83
116
|
|
84
117
|
/*
|
@@ -89,15 +122,95 @@ extern "C" {
|
|
89
122
|
* m = RE2::Regexp.new('(\d+)').match("bob 123")
|
90
123
|
* m.string #=> "bob 123"
|
91
124
|
*/
|
92
|
-
static VALUE
|
93
|
-
re2_matchdata_string(VALUE self)
|
94
|
-
{
|
125
|
+
static VALUE re2_matchdata_string(VALUE self) {
|
95
126
|
re2_matchdata *m;
|
96
127
|
Data_Get_Struct(self, re2_matchdata, m);
|
97
128
|
|
98
129
|
return m->text;
|
99
130
|
}
|
100
131
|
|
132
|
+
/*
|
133
|
+
* Returns the string passed into the consumer.
|
134
|
+
*
|
135
|
+
* @return [String] the original string.
|
136
|
+
* @example
|
137
|
+
* c = RE2::Regexp.new('(\d+)').consume("foo")
|
138
|
+
* c.string #=> "foo"
|
139
|
+
*/
|
140
|
+
static VALUE re2_consumer_string(VALUE self) {
|
141
|
+
re2_consumer *c;
|
142
|
+
Data_Get_Struct(self, re2_consumer, c);
|
143
|
+
|
144
|
+
return c->text;
|
145
|
+
}
|
146
|
+
|
147
|
+
/*
|
148
|
+
* Rewind the consumer to the start of the string.
|
149
|
+
*
|
150
|
+
* @example
|
151
|
+
* c = RE2::Regexp.new('(\d+)').consume("1 2 3")
|
152
|
+
* e = c.to_enum
|
153
|
+
* e.next #=> ["1"]
|
154
|
+
* e.next #=> ["2"]
|
155
|
+
* c.rewind
|
156
|
+
* e.next #=> ["1"]
|
157
|
+
*/
|
158
|
+
static VALUE re2_consumer_rewind(VALUE self) {
|
159
|
+
re2_consumer *c;
|
160
|
+
Data_Get_Struct(self, re2_consumer, c);
|
161
|
+
re2::StringPiece input(RSTRING_PTR(c->text));
|
162
|
+
|
163
|
+
c->input = input;
|
164
|
+
|
165
|
+
return self;
|
166
|
+
}
|
167
|
+
|
168
|
+
/*
|
169
|
+
* Scan the given text incrementally for matches, returning an array of
|
170
|
+
* matches on each subsequent call. Returns nil if no matches are found.
|
171
|
+
*
|
172
|
+
* @return [Array<String>] the matches.
|
173
|
+
* @example
|
174
|
+
* c = RE2::Regexp.new('(\w+)').consume("Foo bar baz")
|
175
|
+
* c.consume #=> ["Foo"]
|
176
|
+
* c.consume #=> ["bar"]
|
177
|
+
*/
|
178
|
+
static VALUE re2_consumer_consume(VALUE self) {
|
179
|
+
int i;
|
180
|
+
re2_pattern *p;
|
181
|
+
re2_consumer *c;
|
182
|
+
VALUE result;
|
183
|
+
|
184
|
+
Data_Get_Struct(self, re2_consumer, c);
|
185
|
+
Data_Get_Struct(c->regexp, re2_pattern, p);
|
186
|
+
|
187
|
+
vector<RE2::Arg> argv(c->argc);
|
188
|
+
vector<RE2::Arg*> args(c->argc);
|
189
|
+
vector<string> matches(c->argc);
|
190
|
+
|
191
|
+
for (i = 0; i < c->argc; i++) {
|
192
|
+
args[i] = &argv[i];
|
193
|
+
argv[i] = &matches[i];
|
194
|
+
}
|
195
|
+
|
196
|
+
if (RE2::FindAndConsumeN(&c->input, *p->pattern, &args[0], c->argc)) {
|
197
|
+
result = rb_ary_new2(c->argc);
|
198
|
+
for (i = 0; i < c->argc; i++) {
|
199
|
+
if (matches[i].empty()) {
|
200
|
+
rb_ary_push(result, Qnil);
|
201
|
+
} else {
|
202
|
+
rb_ary_push(result, ENCODED_STR_NEW(matches[i].data(),
|
203
|
+
matches[i].size(),
|
204
|
+
p->pattern->options().utf8() ? "UTF-8" : "ISO-8859-1"));
|
205
|
+
}
|
206
|
+
}
|
207
|
+
} else {
|
208
|
+
result = Qnil;
|
209
|
+
}
|
210
|
+
|
211
|
+
return result;
|
212
|
+
}
|
213
|
+
|
101
214
|
/*
|
102
215
|
* Returns the number of elements in the match array (including nils).
|
103
216
|
*
|
@@ -107,9 +220,7 @@ extern "C" {
|
|
107
220
|
* m.size #=> 2
|
108
221
|
* m.length #=> 2
|
109
222
|
*/
|
110
|
-
static VALUE
|
111
|
-
re2_matchdata_size(VALUE self)
|
112
|
-
{
|
223
|
+
static VALUE re2_matchdata_size(VALUE self) {
|
113
224
|
re2_matchdata *m;
|
114
225
|
Data_Get_Struct(self, re2_matchdata, m);
|
115
226
|
|
@@ -124,17 +235,28 @@ extern "C" {
|
|
124
235
|
* m = RE2::Regexp.new('(\d+)').match("bob 123")
|
125
236
|
* m.regexp #=> #<RE2::Regexp /(\d+)/>
|
126
237
|
*/
|
127
|
-
static VALUE
|
128
|
-
re2_matchdata_regexp(VALUE self)
|
129
|
-
{
|
238
|
+
static VALUE re2_matchdata_regexp(VALUE self) {
|
130
239
|
re2_matchdata *m;
|
131
240
|
Data_Get_Struct(self, re2_matchdata, m);
|
132
241
|
return m->regexp;
|
133
242
|
}
|
134
243
|
|
135
|
-
|
136
|
-
|
137
|
-
|
244
|
+
/*
|
245
|
+
* Returns the {RE2::Regexp} used in the consumer.
|
246
|
+
*
|
247
|
+
* @return [RE2::Regexp] the regexp used in the consumer
|
248
|
+
* @example
|
249
|
+
* c = RE2::Regexp.new('(\d+)').consume("bob 123")
|
250
|
+
* c.regexp #=> #<RE2::Regexp /(\d+)/>
|
251
|
+
*/
|
252
|
+
static VALUE re2_consumer_regexp(VALUE self) {
|
253
|
+
re2_consumer *c;
|
254
|
+
Data_Get_Struct(self, re2_consumer, c);
|
255
|
+
|
256
|
+
return c->regexp;
|
257
|
+
}
|
258
|
+
|
259
|
+
static VALUE re2_regexp_allocate(VALUE klass) {
|
138
260
|
re2_pattern *p;
|
139
261
|
return Data_Make_Struct(klass, re2_pattern, 0, re2_regexp_free, p);
|
140
262
|
}
|
@@ -147,15 +269,15 @@ extern "C" {
|
|
147
269
|
* m = RE2::Regexp.new('(\d+)').match("bob 123")
|
148
270
|
* m.to_a #=> ["123", "123"]
|
149
271
|
*/
|
150
|
-
static VALUE
|
151
|
-
re2_matchdata_to_a(VALUE self)
|
152
|
-
{
|
272
|
+
static VALUE re2_matchdata_to_a(VALUE self) {
|
153
273
|
int i;
|
154
274
|
re2_matchdata *m;
|
275
|
+
re2_pattern *p;
|
155
276
|
re2::StringPiece match;
|
156
277
|
VALUE array;
|
157
278
|
|
158
279
|
Data_Get_Struct(self, re2_matchdata, m);
|
280
|
+
Data_Get_Struct(m->regexp, re2_pattern, p);
|
159
281
|
|
160
282
|
array = rb_ary_new2(m->number_of_matches);
|
161
283
|
for (i = 0; i < m->number_of_matches; i++) {
|
@@ -163,20 +285,21 @@ extern "C" {
|
|
163
285
|
rb_ary_push(array, Qnil);
|
164
286
|
} else {
|
165
287
|
match = m->matches[i];
|
166
|
-
rb_ary_push(array,
|
288
|
+
rb_ary_push(array, ENCODED_STR_NEW(match.data(), match.size(),
|
289
|
+
p->pattern->options().utf8() ? "UTF-8" : "ISO-8859-1"));
|
167
290
|
}
|
168
291
|
}
|
169
292
|
|
170
293
|
return array;
|
171
294
|
}
|
172
295
|
|
173
|
-
static VALUE
|
174
|
-
re2_matchdata_nth_match(int nth, VALUE self)
|
175
|
-
{
|
296
|
+
static VALUE re2_matchdata_nth_match(int nth, VALUE self) {
|
176
297
|
re2_matchdata *m;
|
298
|
+
re2_pattern *p;
|
177
299
|
re2::StringPiece match;
|
178
300
|
|
179
301
|
Data_Get_Struct(self, re2_matchdata, m);
|
302
|
+
Data_Get_Struct(m->regexp, re2_pattern, p);
|
180
303
|
|
181
304
|
if (nth < 0 || nth >= m->number_of_matches) {
|
182
305
|
return Qnil;
|
@@ -186,14 +309,13 @@ extern "C" {
|
|
186
309
|
if (match.empty()) {
|
187
310
|
return Qnil;
|
188
311
|
} else {
|
189
|
-
return
|
312
|
+
return ENCODED_STR_NEW(match.data(), match.size(),
|
313
|
+
p->pattern->options().utf8() ? "UTF-8" : "ISO-8859-1");
|
190
314
|
}
|
191
315
|
}
|
192
316
|
}
|
193
317
|
|
194
|
-
static VALUE
|
195
|
-
re2_matchdata_named_match(const char* name, VALUE self)
|
196
|
-
{
|
318
|
+
static VALUE re2_matchdata_named_match(const char* name, VALUE self) {
|
197
319
|
int idx;
|
198
320
|
re2_matchdata *m;
|
199
321
|
re2_pattern *p;
|
@@ -256,9 +378,7 @@ extern "C" {
|
|
256
378
|
* m["number"] #=> "123"
|
257
379
|
* m[:number] #=> "123"
|
258
380
|
*/
|
259
|
-
static VALUE
|
260
|
-
re2_matchdata_aref(int argc, VALUE *argv, VALUE self)
|
261
|
-
{
|
381
|
+
static VALUE re2_matchdata_aref(int argc, VALUE *argv, VALUE self) {
|
262
382
|
VALUE idx, rest;
|
263
383
|
rb_scan_args(argc, argv, "11", &idx, &rest);
|
264
384
|
|
@@ -278,9 +398,7 @@ extern "C" {
|
|
278
398
|
*
|
279
399
|
* @return [String] the entire matched string
|
280
400
|
*/
|
281
|
-
static VALUE
|
282
|
-
re2_matchdata_to_s(VALUE self)
|
283
|
-
{
|
401
|
+
static VALUE re2_matchdata_to_s(VALUE self) {
|
284
402
|
return re2_matchdata_nth_match(0, self);
|
285
403
|
}
|
286
404
|
|
@@ -292,17 +410,15 @@ extern "C" {
|
|
292
410
|
* m = RE2::Regexp.new('(\d+)').match("bob 123")
|
293
411
|
* m.inspect #=> "#<RE2::MatchData \"123\" 1:\"123\">"
|
294
412
|
*/
|
295
|
-
static VALUE
|
296
|
-
re2_matchdata_inspect(VALUE self)
|
297
|
-
{
|
413
|
+
static VALUE re2_matchdata_inspect(VALUE self) {
|
298
414
|
int i;
|
299
415
|
re2_matchdata *m;
|
416
|
+
re2_pattern *p;
|
300
417
|
VALUE match, result;
|
301
418
|
ostringstream output;
|
302
419
|
|
303
420
|
Data_Get_Struct(self, re2_matchdata, m);
|
304
|
-
|
305
|
-
result = rb_str_new("#<RE2::MatchData", 16);
|
421
|
+
Data_Get_Struct(m->regexp, re2_pattern, p);
|
306
422
|
|
307
423
|
output << "#<RE2::MatchData";
|
308
424
|
|
@@ -324,7 +440,8 @@ extern "C" {
|
|
324
440
|
|
325
441
|
output << ">";
|
326
442
|
|
327
|
-
result =
|
443
|
+
result = ENCODED_STR_NEW(output.str().data(), output.str().length(),
|
444
|
+
p->pattern->options().utf8() ? "UTF-8" : "ISO-8859-1");
|
328
445
|
|
329
446
|
return result;
|
330
447
|
}
|
@@ -339,9 +456,7 @@ extern "C" {
|
|
339
456
|
* @see RE2::Regexp.new
|
340
457
|
*
|
341
458
|
*/
|
342
|
-
static VALUE
|
343
|
-
re2_re2(int argc, VALUE *argv, VALUE self)
|
344
|
-
{
|
459
|
+
static VALUE re2_re2(int argc, VALUE *argv, VALUE self) {
|
345
460
|
UNUSED(self);
|
346
461
|
return rb_class_new_instance(argc, argv, re2_cRegexp);
|
347
462
|
}
|
@@ -358,7 +473,8 @@ extern "C" {
|
|
358
473
|
*
|
359
474
|
* @param [String] pattern the pattern to compile
|
360
475
|
* @return [RE2::Regexp] an RE2::Regexp with the specified pattern
|
361
|
-
* @raise [NoMemoryError] if memory could not be allocated for the compiled
|
476
|
+
* @raise [NoMemoryError] if memory could not be allocated for the compiled
|
477
|
+
* pattern
|
362
478
|
*
|
363
479
|
* @overload initialize(pattern, options)
|
364
480
|
* Returns a new {RE2::Regexp} object with a compiled version of
|
@@ -380,11 +496,9 @@ extern "C" {
|
|
380
496
|
* @return [RE2::Regexp] an RE2::Regexp with the specified pattern and options
|
381
497
|
* @raise [NoMemoryError] if memory could not be allocated for the compiled pattern
|
382
498
|
*/
|
383
|
-
static VALUE
|
384
|
-
re2_regexp_initialize(int argc, VALUE *argv, VALUE self)
|
385
|
-
{
|
499
|
+
static VALUE re2_regexp_initialize(int argc, VALUE *argv, VALUE self) {
|
386
500
|
VALUE pattern, options, utf8, posix_syntax, longest_match, log_errors,
|
387
|
-
max_mem, literal, never_nl, case_sensitive, perl_classes,
|
501
|
+
max_mem, literal, never_nl, case_sensitive, perl_classes,
|
388
502
|
word_boundary, one_line;
|
389
503
|
re2_pattern *p;
|
390
504
|
|
@@ -453,9 +567,9 @@ extern "C" {
|
|
453
567
|
re2_options.set_one_line(RTEST(one_line));
|
454
568
|
}
|
455
569
|
|
456
|
-
p->pattern = new
|
570
|
+
p->pattern = new(nothrow) RE2(StringValuePtr(pattern), re2_options);
|
457
571
|
} else {
|
458
|
-
p->pattern = new
|
572
|
+
p->pattern = new(nothrow) RE2(StringValuePtr(pattern));
|
459
573
|
}
|
460
574
|
|
461
575
|
if (p->pattern == 0) {
|
@@ -473,9 +587,7 @@ extern "C" {
|
|
473
587
|
* re2 = RE2::Regexp.new("woo?")
|
474
588
|
* re2.inspect #=> "#<RE2::Regexp /woo?/>"
|
475
589
|
*/
|
476
|
-
static VALUE
|
477
|
-
re2_regexp_inspect(VALUE self)
|
478
|
-
{
|
590
|
+
static VALUE re2_regexp_inspect(VALUE self) {
|
479
591
|
re2_pattern *p;
|
480
592
|
VALUE result;
|
481
593
|
ostringstream output;
|
@@ -484,7 +596,8 @@ extern "C" {
|
|
484
596
|
|
485
597
|
output << "#<RE2::Regexp /" << p->pattern->pattern() << "/>";
|
486
598
|
|
487
|
-
result =
|
599
|
+
result = ENCODED_STR_NEW(output.str().data(), output.str().length(),
|
600
|
+
p->pattern->options().utf8() ? "UTF-8" : "ISO-8859-1");
|
488
601
|
|
489
602
|
return result;
|
490
603
|
}
|
@@ -497,12 +610,12 @@ extern "C" {
|
|
497
610
|
* re2 = RE2::Regexp.new("woo?")
|
498
611
|
* re2.to_s #=> "woo?"
|
499
612
|
*/
|
500
|
-
static VALUE
|
501
|
-
re2_regexp_to_s(VALUE self)
|
502
|
-
{
|
613
|
+
static VALUE re2_regexp_to_s(VALUE self) {
|
503
614
|
re2_pattern *p;
|
504
615
|
Data_Get_Struct(self, re2_pattern, p);
|
505
|
-
return
|
616
|
+
return ENCODED_STR_NEW(p->pattern->pattern().data(),
|
617
|
+
p->pattern->pattern().size(),
|
618
|
+
p->pattern->options().utf8() ? "UTF-8" : "ISO-8859-1");
|
506
619
|
}
|
507
620
|
|
508
621
|
/*
|
@@ -514,9 +627,7 @@ extern "C" {
|
|
514
627
|
* re2 = RE2::Regexp.new("woo?")
|
515
628
|
* re2.ok? #=> true
|
516
629
|
*/
|
517
|
-
static VALUE
|
518
|
-
re2_regexp_ok(VALUE self)
|
519
|
-
{
|
630
|
+
static VALUE re2_regexp_ok(VALUE self) {
|
520
631
|
re2_pattern *p;
|
521
632
|
Data_Get_Struct(self, re2_pattern, p);
|
522
633
|
return BOOL2RUBY(p->pattern->ok());
|
@@ -531,9 +642,7 @@ extern "C" {
|
|
531
642
|
* re2 = RE2::Regexp.new("woo?", :utf8 => true)
|
532
643
|
* re2.utf8? #=> true
|
533
644
|
*/
|
534
|
-
static VALUE
|
535
|
-
re2_regexp_utf8(VALUE self)
|
536
|
-
{
|
645
|
+
static VALUE re2_regexp_utf8(VALUE self) {
|
537
646
|
re2_pattern *p;
|
538
647
|
Data_Get_Struct(self, re2_pattern, p);
|
539
648
|
return BOOL2RUBY(p->pattern->options().utf8());
|
@@ -548,9 +657,7 @@ extern "C" {
|
|
548
657
|
* re2 = RE2::Regexp.new("woo?", :posix_syntax => true)
|
549
658
|
* re2.posix_syntax? #=> true
|
550
659
|
*/
|
551
|
-
static VALUE
|
552
|
-
re2_regexp_posix_syntax(VALUE self)
|
553
|
-
{
|
660
|
+
static VALUE re2_regexp_posix_syntax(VALUE self) {
|
554
661
|
re2_pattern *p;
|
555
662
|
Data_Get_Struct(self, re2_pattern, p);
|
556
663
|
return BOOL2RUBY(p->pattern->options().posix_syntax());
|
@@ -565,9 +672,7 @@ extern "C" {
|
|
565
672
|
* re2 = RE2::Regexp.new("woo?", :longest_match => true)
|
566
673
|
* re2.longest_match? #=> true
|
567
674
|
*/
|
568
|
-
static VALUE
|
569
|
-
re2_regexp_longest_match(VALUE self)
|
570
|
-
{
|
675
|
+
static VALUE re2_regexp_longest_match(VALUE self) {
|
571
676
|
re2_pattern *p;
|
572
677
|
Data_Get_Struct(self, re2_pattern, p);
|
573
678
|
return BOOL2RUBY(p->pattern->options().longest_match());
|
@@ -582,9 +687,7 @@ extern "C" {
|
|
582
687
|
* re2 = RE2::Regexp.new("woo?", :log_errors => true)
|
583
688
|
* re2.log_errors? #=> true
|
584
689
|
*/
|
585
|
-
static VALUE
|
586
|
-
re2_regexp_log_errors(VALUE self)
|
587
|
-
{
|
690
|
+
static VALUE re2_regexp_log_errors(VALUE self) {
|
588
691
|
re2_pattern *p;
|
589
692
|
Data_Get_Struct(self, re2_pattern, p);
|
590
693
|
return BOOL2RUBY(p->pattern->options().log_errors());
|
@@ -599,9 +702,7 @@ extern "C" {
|
|
599
702
|
* re2 = RE2::Regexp.new("woo?", :max_mem => 1024)
|
600
703
|
* re2.max_mem #=> 1024
|
601
704
|
*/
|
602
|
-
static VALUE
|
603
|
-
re2_regexp_max_mem(VALUE self)
|
604
|
-
{
|
705
|
+
static VALUE re2_regexp_max_mem(VALUE self) {
|
605
706
|
re2_pattern *p;
|
606
707
|
Data_Get_Struct(self, re2_pattern, p);
|
607
708
|
return INT2FIX(p->pattern->options().max_mem());
|
@@ -616,9 +717,7 @@ extern "C" {
|
|
616
717
|
* re2 = RE2::Regexp.new("woo?", :literal => true)
|
617
718
|
* re2.literal? #=> true
|
618
719
|
*/
|
619
|
-
static VALUE
|
620
|
-
re2_regexp_literal(VALUE self)
|
621
|
-
{
|
720
|
+
static VALUE re2_regexp_literal(VALUE self) {
|
622
721
|
re2_pattern *p;
|
623
722
|
Data_Get_Struct(self, re2_pattern, p);
|
624
723
|
return BOOL2RUBY(p->pattern->options().literal());
|
@@ -633,9 +732,7 @@ extern "C" {
|
|
633
732
|
* re2 = RE2::Regexp.new("woo?", :never_nl => true)
|
634
733
|
* re2.never_nl? #=> true
|
635
734
|
*/
|
636
|
-
static VALUE
|
637
|
-
re2_regexp_never_nl(VALUE self)
|
638
|
-
{
|
735
|
+
static VALUE re2_regexp_never_nl(VALUE self) {
|
639
736
|
re2_pattern *p;
|
640
737
|
Data_Get_Struct(self, re2_pattern, p);
|
641
738
|
return BOOL2RUBY(p->pattern->options().never_nl());
|
@@ -650,9 +747,7 @@ extern "C" {
|
|
650
747
|
* re2 = RE2::Regexp.new("woo?", :case_sensitive => true)
|
651
748
|
* re2.case_sensitive? #=> true
|
652
749
|
*/
|
653
|
-
static VALUE
|
654
|
-
re2_regexp_case_sensitive(VALUE self)
|
655
|
-
{
|
750
|
+
static VALUE re2_regexp_case_sensitive(VALUE self) {
|
656
751
|
re2_pattern *p;
|
657
752
|
Data_Get_Struct(self, re2_pattern, p);
|
658
753
|
return BOOL2RUBY(p->pattern->options().case_sensitive());
|
@@ -668,9 +763,7 @@ extern "C" {
|
|
668
763
|
* re2.case_insensitive? #=> false
|
669
764
|
* re2.casefold? #=> false
|
670
765
|
*/
|
671
|
-
static VALUE
|
672
|
-
re2_regexp_case_insensitive(VALUE self)
|
673
|
-
{
|
766
|
+
static VALUE re2_regexp_case_insensitive(VALUE self) {
|
674
767
|
return BOOL2RUBY(re2_regexp_case_sensitive(self) != Qtrue);
|
675
768
|
}
|
676
769
|
|
@@ -683,9 +776,7 @@ extern "C" {
|
|
683
776
|
* re2 = RE2::Regexp.new("woo?", :perl_classes => true)
|
684
777
|
* re2.perl_classes? #=> true
|
685
778
|
*/
|
686
|
-
static VALUE
|
687
|
-
re2_regexp_perl_classes(VALUE self)
|
688
|
-
{
|
779
|
+
static VALUE re2_regexp_perl_classes(VALUE self) {
|
689
780
|
re2_pattern *p;
|
690
781
|
Data_Get_Struct(self, re2_pattern, p);
|
691
782
|
return BOOL2RUBY(p->pattern->options().perl_classes());
|
@@ -700,9 +791,7 @@ extern "C" {
|
|
700
791
|
* re2 = RE2::Regexp.new("woo?", :word_boundary => true)
|
701
792
|
* re2.word_boundary? #=> true
|
702
793
|
*/
|
703
|
-
static VALUE
|
704
|
-
re2_regexp_word_boundary(VALUE self)
|
705
|
-
{
|
794
|
+
static VALUE re2_regexp_word_boundary(VALUE self) {
|
706
795
|
re2_pattern *p;
|
707
796
|
Data_Get_Struct(self, re2_pattern, p);
|
708
797
|
return BOOL2RUBY(p->pattern->options().word_boundary());
|
@@ -717,9 +806,7 @@ extern "C" {
|
|
717
806
|
* re2 = RE2::Regexp.new("woo?", :one_line => true)
|
718
807
|
* re2.one_line? #=> true
|
719
808
|
*/
|
720
|
-
static VALUE
|
721
|
-
re2_regexp_one_line(VALUE self)
|
722
|
-
{
|
809
|
+
static VALUE re2_regexp_one_line(VALUE self) {
|
723
810
|
re2_pattern *p;
|
724
811
|
Data_Get_Struct(self, re2_pattern, p);
|
725
812
|
return BOOL2RUBY(p->pattern->options().one_line());
|
@@ -731,9 +818,7 @@ extern "C" {
|
|
731
818
|
*
|
732
819
|
* @return [String, nil] the error string or nil
|
733
820
|
*/
|
734
|
-
static VALUE
|
735
|
-
re2_regexp_error(VALUE self)
|
736
|
-
{
|
821
|
+
static VALUE re2_regexp_error(VALUE self) {
|
737
822
|
re2_pattern *p;
|
738
823
|
Data_Get_Struct(self, re2_pattern, p);
|
739
824
|
if (p->pattern->ok()) {
|
@@ -749,15 +834,15 @@ extern "C" {
|
|
749
834
|
*
|
750
835
|
* @return [String, nil] the offending portion of the regexp or nil
|
751
836
|
*/
|
752
|
-
static VALUE
|
753
|
-
re2_regexp_error_arg(VALUE self)
|
754
|
-
{
|
837
|
+
static VALUE re2_regexp_error_arg(VALUE self) {
|
755
838
|
re2_pattern *p;
|
756
839
|
Data_Get_Struct(self, re2_pattern, p);
|
757
840
|
if (p->pattern->ok()) {
|
758
841
|
return Qnil;
|
759
842
|
} else {
|
760
|
-
return
|
843
|
+
return ENCODED_STR_NEW(p->pattern->error_arg().data(),
|
844
|
+
p->pattern->error_arg().size(),
|
845
|
+
p->pattern->options().utf8() ? "UTF-8" : "ISO-8859-1");
|
761
846
|
}
|
762
847
|
}
|
763
848
|
|
@@ -768,9 +853,7 @@ extern "C" {
|
|
768
853
|
*
|
769
854
|
* @return [Fixnum] the regexp "cost"
|
770
855
|
*/
|
771
|
-
static VALUE
|
772
|
-
re2_regexp_program_size(VALUE self)
|
773
|
-
{
|
856
|
+
static VALUE re2_regexp_program_size(VALUE self) {
|
774
857
|
re2_pattern *p;
|
775
858
|
Data_Get_Struct(self, re2_pattern, p);
|
776
859
|
return INT2FIX(p->pattern->ProgramSize());
|
@@ -782,9 +865,7 @@ extern "C" {
|
|
782
865
|
*
|
783
866
|
* @return [Hash] the options
|
784
867
|
*/
|
785
|
-
static VALUE
|
786
|
-
re2_regexp_options(VALUE self)
|
787
|
-
{
|
868
|
+
static VALUE re2_regexp_options(VALUE self) {
|
788
869
|
VALUE options;
|
789
870
|
re2_pattern *p;
|
790
871
|
|
@@ -837,9 +918,7 @@ extern "C" {
|
|
837
918
|
*
|
838
919
|
* @return [Fixnum] the number of capturing subpatterns
|
839
920
|
*/
|
840
|
-
static VALUE
|
841
|
-
re2_regexp_number_of_capturing_groups(VALUE self)
|
842
|
-
{
|
921
|
+
static VALUE re2_regexp_number_of_capturing_groups(VALUE self) {
|
843
922
|
re2_pattern *p;
|
844
923
|
|
845
924
|
Data_Get_Struct(self, re2_pattern, p);
|
@@ -851,9 +930,7 @@ extern "C" {
|
|
851
930
|
*
|
852
931
|
* @return [Hash] a hash of names to capturing indices
|
853
932
|
*/
|
854
|
-
static VALUE
|
855
|
-
re2_regexp_named_capturing_groups(VALUE self)
|
856
|
-
{
|
933
|
+
static VALUE re2_regexp_named_capturing_groups(VALUE self) {
|
857
934
|
VALUE capturing_groups;
|
858
935
|
re2_pattern *p;
|
859
936
|
map<string, int> groups;
|
@@ -865,7 +942,8 @@ extern "C" {
|
|
865
942
|
|
866
943
|
for (iterator = groups.begin(); iterator != groups.end(); iterator++) {
|
867
944
|
rb_hash_aset(capturing_groups,
|
868
|
-
|
945
|
+
ENCODED_STR_NEW(iterator->first.data(), iterator->first.size(),
|
946
|
+
p->pattern->options().utf8() ? "UTF-8" : "ISO-8859-1"),
|
869
947
|
INT2FIX(iterator->second));
|
870
948
|
}
|
871
949
|
|
@@ -916,9 +994,7 @@ extern "C" {
|
|
916
994
|
* r.match('woo', 1) #=> #<RE2::MatchData "woo" 1:"o">
|
917
995
|
* r.match('woo', 3) #=> #<RE2::MatchData "woo" 1:"o" 2:"o" 3:nil>
|
918
996
|
*/
|
919
|
-
static VALUE
|
920
|
-
re2_regexp_match(int argc, VALUE *argv, VALUE self)
|
921
|
-
{
|
997
|
+
static VALUE re2_regexp_match(int argc, VALUE *argv, VALUE self) {
|
922
998
|
int n;
|
923
999
|
bool matched;
|
924
1000
|
re2_pattern *p;
|
@@ -936,7 +1012,8 @@ extern "C" {
|
|
936
1012
|
}
|
937
1013
|
|
938
1014
|
if (n == 0) {
|
939
|
-
matched = match(p->pattern, StringValuePtr(text), 0,
|
1015
|
+
matched = match(p->pattern, StringValuePtr(text), 0,
|
1016
|
+
static_cast<int>(RSTRING_LEN(text)), RE2::UNANCHORED, 0, 0);
|
940
1017
|
return BOOL2RUBY(matched);
|
941
1018
|
} else {
|
942
1019
|
|
@@ -945,18 +1022,21 @@ extern "C" {
|
|
945
1022
|
|
946
1023
|
matchdata = rb_class_new_instance(0, 0, re2_cMatchData);
|
947
1024
|
Data_Get_Struct(matchdata, re2_matchdata, m);
|
948
|
-
m->matches = new
|
1025
|
+
m->matches = new(nothrow) re2::StringPiece[n];
|
949
1026
|
m->regexp = self;
|
950
1027
|
m->text = rb_str_dup(text);
|
951
1028
|
rb_str_freeze(m->text);
|
952
1029
|
|
953
1030
|
if (m->matches == 0) {
|
954
|
-
rb_raise(rb_eNoMemError,
|
1031
|
+
rb_raise(rb_eNoMemError,
|
1032
|
+
"not enough memory to allocate StringPieces for matches");
|
955
1033
|
}
|
956
1034
|
|
957
1035
|
m->number_of_matches = n;
|
958
1036
|
|
959
|
-
matched = match(p->pattern, StringValuePtr(text), 0,
|
1037
|
+
matched = match(p->pattern, StringValuePtr(text), 0,
|
1038
|
+
static_cast<int>(RSTRING_LEN(text)),
|
1039
|
+
RE2::UNANCHORED, m->matches, n);
|
960
1040
|
|
961
1041
|
if (matched) {
|
962
1042
|
return matchdata;
|
@@ -972,9 +1052,7 @@ extern "C" {
|
|
972
1052
|
*
|
973
1053
|
* @return [Boolean] whether the match was successful
|
974
1054
|
*/
|
975
|
-
static VALUE
|
976
|
-
re2_regexp_match_query(VALUE self, VALUE text)
|
977
|
-
{
|
1055
|
+
static VALUE re2_regexp_match_query(VALUE self, VALUE text) {
|
978
1056
|
VALUE argv[2];
|
979
1057
|
argv[0] = text;
|
980
1058
|
argv[1] = INT2FIX(0);
|
@@ -983,7 +1061,30 @@ extern "C" {
|
|
983
1061
|
}
|
984
1062
|
|
985
1063
|
/*
|
986
|
-
*
|
1064
|
+
* Returns a {RE2::Consumer} for scanning the given text incrementally.
|
1065
|
+
*
|
1066
|
+
* @example
|
1067
|
+
* c = RE2::Regexp.new('(\w+)').consume("Foo bar baz")
|
1068
|
+
*/
|
1069
|
+
static VALUE re2_regexp_consume(VALUE self, VALUE text) {
|
1070
|
+
re2_pattern *p;
|
1071
|
+
re2_consumer *c;
|
1072
|
+
VALUE consumer;
|
1073
|
+
re2::StringPiece input(RSTRING_PTR(text));
|
1074
|
+
|
1075
|
+
Data_Get_Struct(self, re2_pattern, p);
|
1076
|
+
consumer = rb_class_new_instance(0, 0, re2_cConsumer);
|
1077
|
+
Data_Get_Struct(consumer, re2_consumer, c);
|
1078
|
+
c->input = input;
|
1079
|
+
c->regexp = self;
|
1080
|
+
c->text = text;
|
1081
|
+
c->argc = p->pattern->NumberOfCapturingGroups();
|
1082
|
+
|
1083
|
+
return consumer;
|
1084
|
+
}
|
1085
|
+
|
1086
|
+
/*
|
1087
|
+
* Replaces the first occurrence +pattern+ in +str+ with
|
987
1088
|
* +rewrite+ <i>in place</i>.
|
988
1089
|
*
|
989
1090
|
* @param [String] str the string to modify
|
@@ -996,11 +1097,10 @@ extern "C" {
|
|
996
1097
|
* RE2.Replace("hello there", re2, "yo") #=> "yo there"
|
997
1098
|
* text = "Good morning"
|
998
1099
|
* RE2.Replace(text, "morn", "even") #=> "Good evening"
|
999
|
-
* text
|
1100
|
+
* text #=> "Good evening"
|
1000
1101
|
*/
|
1001
|
-
static VALUE
|
1002
|
-
|
1003
|
-
{
|
1102
|
+
static VALUE re2_Replace(VALUE self, VALUE str, VALUE pattern,
|
1103
|
+
VALUE rewrite) {
|
1004
1104
|
|
1005
1105
|
/* Look out for frozen strings. */
|
1006
1106
|
rb_check_frozen(str);
|
@@ -1017,7 +1117,8 @@ extern "C" {
|
|
1017
1117
|
Data_Get_Struct(pattern, re2_pattern, p);
|
1018
1118
|
RE2::Replace(&str_as_string, *p->pattern, StringValuePtr(rewrite));
|
1019
1119
|
} else {
|
1020
|
-
RE2::Replace(&str_as_string, StringValuePtr(pattern),
|
1120
|
+
RE2::Replace(&str_as_string, StringValuePtr(pattern),
|
1121
|
+
StringValuePtr(rewrite));
|
1021
1122
|
}
|
1022
1123
|
|
1023
1124
|
/* Save the replacement as a VALUE. */
|
@@ -1033,7 +1134,7 @@ extern "C" {
|
|
1033
1134
|
}
|
1034
1135
|
|
1035
1136
|
/*
|
1036
|
-
* Replaces every occurrence of +pattern+ in +str+ with
|
1137
|
+
* Replaces every occurrence of +pattern+ in +str+ with
|
1037
1138
|
* +rewrite+ <i>in place</i>.
|
1038
1139
|
*
|
1039
1140
|
* @param [String] str the string to modify
|
@@ -1048,9 +1149,8 @@ extern "C" {
|
|
1048
1149
|
* RE2.GlobalReplace(text, "o", "ee") #=> "Geeeed meerning"
|
1049
1150
|
* text #=> "Geeeed meerning"
|
1050
1151
|
*/
|
1051
|
-
static VALUE
|
1052
|
-
|
1053
|
-
{
|
1152
|
+
static VALUE re2_GlobalReplace(VALUE self, VALUE str, VALUE pattern,
|
1153
|
+
VALUE rewrite) {
|
1054
1154
|
|
1055
1155
|
/* Look out for frozen strings. */
|
1056
1156
|
rb_check_frozen(str);
|
@@ -1067,7 +1167,8 @@ extern "C" {
|
|
1067
1167
|
Data_Get_Struct(pattern, re2_pattern, p);
|
1068
1168
|
RE2::GlobalReplace(&str_as_string, *p->pattern, StringValuePtr(rewrite));
|
1069
1169
|
} else {
|
1070
|
-
RE2::GlobalReplace(&str_as_string, StringValuePtr(pattern),
|
1170
|
+
RE2::GlobalReplace(&str_as_string, StringValuePtr(pattern),
|
1171
|
+
StringValuePtr(rewrite));
|
1071
1172
|
}
|
1072
1173
|
|
1073
1174
|
/* Save the replacement as a VALUE. */
|
@@ -1092,70 +1193,121 @@ extern "C" {
|
|
1092
1193
|
* @example
|
1093
1194
|
* RE2::Regexp.escape("1.5-2.0?") #=> "1\.5\-2\.0\?"
|
1094
1195
|
*/
|
1095
|
-
static VALUE
|
1096
|
-
re2_QuoteMeta(VALUE self, VALUE unquoted)
|
1097
|
-
{
|
1196
|
+
static VALUE re2_QuoteMeta(VALUE self, VALUE unquoted) {
|
1098
1197
|
UNUSED(self);
|
1099
1198
|
string quoted_string = RE2::QuoteMeta(StringValuePtr(unquoted));
|
1100
1199
|
return rb_str_new(quoted_string.data(), quoted_string.size());
|
1101
1200
|
}
|
1102
1201
|
|
1103
|
-
void
|
1104
|
-
Init_re2()
|
1105
|
-
{
|
1202
|
+
void Init_re2(void) {
|
1106
1203
|
re2_mRE2 = rb_define_module("RE2");
|
1107
1204
|
re2_cRegexp = rb_define_class_under(re2_mRE2, "Regexp", rb_cObject);
|
1108
1205
|
re2_cMatchData = rb_define_class_under(re2_mRE2, "MatchData", rb_cObject);
|
1206
|
+
re2_cConsumer = rb_define_class_under(re2_mRE2, "Consumer", rb_cObject);
|
1109
1207
|
|
1110
1208
|
rb_define_alloc_func(re2_cRegexp, (VALUE (*)(VALUE))re2_regexp_allocate);
|
1111
|
-
rb_define_alloc_func(re2_cMatchData,
|
1112
|
-
|
1113
|
-
|
1114
|
-
|
1115
|
-
|
1116
|
-
rb_define_method(re2_cMatchData, "
|
1117
|
-
|
1118
|
-
rb_define_method(re2_cMatchData, "
|
1119
|
-
|
1120
|
-
rb_define_method(re2_cMatchData, "
|
1121
|
-
|
1122
|
-
rb_define_method(
|
1209
|
+
rb_define_alloc_func(re2_cMatchData,
|
1210
|
+
(VALUE (*)(VALUE))re2_matchdata_allocate);
|
1211
|
+
rb_define_alloc_func(re2_cConsumer,
|
1212
|
+
(VALUE (*)(VALUE))re2_consumer_allocate);
|
1213
|
+
|
1214
|
+
rb_define_method(re2_cMatchData, "string",
|
1215
|
+
RUBY_METHOD_FUNC(re2_matchdata_string), 0);
|
1216
|
+
rb_define_method(re2_cMatchData, "regexp",
|
1217
|
+
RUBY_METHOD_FUNC(re2_matchdata_regexp), 0);
|
1218
|
+
rb_define_method(re2_cMatchData, "to_a",
|
1219
|
+
RUBY_METHOD_FUNC(re2_matchdata_to_a), 0);
|
1220
|
+
rb_define_method(re2_cMatchData, "size",
|
1221
|
+
RUBY_METHOD_FUNC(re2_matchdata_size), 0);
|
1222
|
+
rb_define_method(re2_cMatchData, "length",
|
1223
|
+
RUBY_METHOD_FUNC(re2_matchdata_size), 0);
|
1224
|
+
rb_define_method(re2_cMatchData, "[]", RUBY_METHOD_FUNC(re2_matchdata_aref),
|
1225
|
+
-1); rb_define_method(re2_cMatchData, "to_s",
|
1226
|
+
RUBY_METHOD_FUNC(re2_matchdata_to_s), 0);
|
1227
|
+
rb_define_method(re2_cMatchData, "inspect",
|
1228
|
+
RUBY_METHOD_FUNC(re2_matchdata_inspect), 0);
|
1229
|
+
|
1230
|
+
rb_define_method(re2_cConsumer, "string",
|
1231
|
+
RUBY_METHOD_FUNC(re2_consumer_string), 0);
|
1232
|
+
rb_define_method(re2_cConsumer, "regexp",
|
1233
|
+
RUBY_METHOD_FUNC(re2_consumer_regexp), 0);
|
1234
|
+
rb_define_method(re2_cConsumer, "consume",
|
1235
|
+
RUBY_METHOD_FUNC(re2_consumer_consume), 0);
|
1236
|
+
rb_define_method(re2_cConsumer, "rewind",
|
1237
|
+
RUBY_METHOD_FUNC(re2_consumer_rewind), 0);
|
1238
|
+
|
1239
|
+
rb_define_method(re2_cRegexp, "initialize",
|
1240
|
+
RUBY_METHOD_FUNC(re2_regexp_initialize), -1);
|
1123
1241
|
rb_define_method(re2_cRegexp, "ok?", RUBY_METHOD_FUNC(re2_regexp_ok), 0);
|
1124
|
-
rb_define_method(re2_cRegexp, "error", RUBY_METHOD_FUNC(re2_regexp_error),
|
1125
|
-
|
1126
|
-
rb_define_method(re2_cRegexp, "
|
1127
|
-
|
1128
|
-
rb_define_method(re2_cRegexp, "
|
1129
|
-
|
1130
|
-
rb_define_method(re2_cRegexp, "
|
1131
|
-
|
1132
|
-
rb_define_method(re2_cRegexp, "
|
1133
|
-
|
1242
|
+
rb_define_method(re2_cRegexp, "error", RUBY_METHOD_FUNC(re2_regexp_error),
|
1243
|
+
0);
|
1244
|
+
rb_define_method(re2_cRegexp, "error_arg",
|
1245
|
+
RUBY_METHOD_FUNC(re2_regexp_error_arg), 0);
|
1246
|
+
rb_define_method(re2_cRegexp, "program_size",
|
1247
|
+
RUBY_METHOD_FUNC(re2_regexp_program_size), 0);
|
1248
|
+
rb_define_method(re2_cRegexp, "options",
|
1249
|
+
RUBY_METHOD_FUNC(re2_regexp_options), 0);
|
1250
|
+
rb_define_method(re2_cRegexp, "number_of_capturing_groups",
|
1251
|
+
RUBY_METHOD_FUNC(re2_regexp_number_of_capturing_groups), 0);
|
1252
|
+
rb_define_method(re2_cRegexp, "named_capturing_groups",
|
1253
|
+
RUBY_METHOD_FUNC(re2_regexp_named_capturing_groups), 0);
|
1254
|
+
rb_define_method(re2_cRegexp, "match", RUBY_METHOD_FUNC(re2_regexp_match),
|
1255
|
+
-1);
|
1256
|
+
rb_define_method(re2_cRegexp, "match?",
|
1257
|
+
RUBY_METHOD_FUNC(re2_regexp_match_query), 1);
|
1258
|
+
rb_define_method(re2_cRegexp, "=~",
|
1259
|
+
RUBY_METHOD_FUNC(re2_regexp_match_query), 1);
|
1260
|
+
rb_define_method(re2_cRegexp, "===",
|
1261
|
+
RUBY_METHOD_FUNC(re2_regexp_match_query), 1);
|
1262
|
+
rb_define_method(re2_cRegexp, "consume", RUBY_METHOD_FUNC(re2_regexp_consume), 1);
|
1134
1263
|
rb_define_method(re2_cRegexp, "to_s", RUBY_METHOD_FUNC(re2_regexp_to_s), 0);
|
1135
|
-
rb_define_method(re2_cRegexp, "to_str", RUBY_METHOD_FUNC(re2_regexp_to_s),
|
1136
|
-
|
1137
|
-
rb_define_method(re2_cRegexp, "
|
1138
|
-
|
1139
|
-
rb_define_method(re2_cRegexp, "
|
1140
|
-
|
1141
|
-
rb_define_method(re2_cRegexp, "
|
1142
|
-
|
1143
|
-
rb_define_method(re2_cRegexp, "
|
1144
|
-
|
1145
|
-
rb_define_method(re2_cRegexp, "
|
1146
|
-
|
1147
|
-
rb_define_method(re2_cRegexp, "
|
1148
|
-
|
1149
|
-
rb_define_method(re2_cRegexp, "
|
1150
|
-
|
1151
|
-
rb_define_method(re2_cRegexp, "
|
1152
|
-
|
1153
|
-
|
1154
|
-
|
1155
|
-
|
1156
|
-
|
1157
|
-
|
1158
|
-
|
1264
|
+
rb_define_method(re2_cRegexp, "to_str", RUBY_METHOD_FUNC(re2_regexp_to_s),
|
1265
|
+
0);
|
1266
|
+
rb_define_method(re2_cRegexp, "pattern", RUBY_METHOD_FUNC(re2_regexp_to_s),
|
1267
|
+
0);
|
1268
|
+
rb_define_method(re2_cRegexp, "source", RUBY_METHOD_FUNC(re2_regexp_to_s),
|
1269
|
+
0);
|
1270
|
+
rb_define_method(re2_cRegexp, "inspect",
|
1271
|
+
RUBY_METHOD_FUNC(re2_regexp_inspect), 0);
|
1272
|
+
rb_define_method(re2_cRegexp, "utf8?", RUBY_METHOD_FUNC(re2_regexp_utf8),
|
1273
|
+
0);
|
1274
|
+
rb_define_method(re2_cRegexp, "posix_syntax?",
|
1275
|
+
RUBY_METHOD_FUNC(re2_regexp_posix_syntax), 0);
|
1276
|
+
rb_define_method(re2_cRegexp, "longest_match?",
|
1277
|
+
RUBY_METHOD_FUNC(re2_regexp_longest_match), 0);
|
1278
|
+
rb_define_method(re2_cRegexp, "log_errors?",
|
1279
|
+
RUBY_METHOD_FUNC(re2_regexp_log_errors), 0);
|
1280
|
+
rb_define_method(re2_cRegexp, "max_mem",
|
1281
|
+
RUBY_METHOD_FUNC(re2_regexp_max_mem), 0);
|
1282
|
+
rb_define_method(re2_cRegexp, "literal?",
|
1283
|
+
RUBY_METHOD_FUNC(re2_regexp_literal), 0);
|
1284
|
+
rb_define_method(re2_cRegexp, "never_nl?",
|
1285
|
+
RUBY_METHOD_FUNC(re2_regexp_never_nl), 0);
|
1286
|
+
rb_define_method(re2_cRegexp, "case_sensitive?",
|
1287
|
+
RUBY_METHOD_FUNC(re2_regexp_case_sensitive), 0);
|
1288
|
+
rb_define_method(re2_cRegexp, "case_insensitive?",
|
1289
|
+
RUBY_METHOD_FUNC(re2_regexp_case_insensitive), 0);
|
1290
|
+
rb_define_method(re2_cRegexp, "casefold?",
|
1291
|
+
RUBY_METHOD_FUNC(re2_regexp_case_insensitive), 0);
|
1292
|
+
rb_define_method(re2_cRegexp, "perl_classes?",
|
1293
|
+
RUBY_METHOD_FUNC(re2_regexp_perl_classes), 0);
|
1294
|
+
rb_define_method(re2_cRegexp, "word_boundary?",
|
1295
|
+
RUBY_METHOD_FUNC(re2_regexp_word_boundary), 0);
|
1296
|
+
rb_define_method(re2_cRegexp, "one_line?",
|
1297
|
+
RUBY_METHOD_FUNC(re2_regexp_one_line), 0);
|
1298
|
+
|
1299
|
+
rb_define_module_function(re2_mRE2, "Replace",
|
1300
|
+
RUBY_METHOD_FUNC(re2_Replace), 3);
|
1301
|
+
rb_define_module_function(re2_mRE2, "GlobalReplace",
|
1302
|
+
RUBY_METHOD_FUNC(re2_GlobalReplace), 3);
|
1303
|
+
rb_define_module_function(re2_mRE2, "QuoteMeta",
|
1304
|
+
RUBY_METHOD_FUNC(re2_QuoteMeta), 1);
|
1305
|
+
rb_define_singleton_method(re2_cRegexp, "escape",
|
1306
|
+
RUBY_METHOD_FUNC(re2_QuoteMeta), 1);
|
1307
|
+
rb_define_singleton_method(re2_cRegexp, "quote",
|
1308
|
+
RUBY_METHOD_FUNC(re2_QuoteMeta), 1);
|
1309
|
+
rb_define_singleton_method(re2_cRegexp, "compile",
|
1310
|
+
RUBY_METHOD_FUNC(rb_class_new_instance), -1);
|
1159
1311
|
|
1160
1312
|
rb_define_global_function("RE2", RUBY_METHOD_FUNC(re2_re2), -1);
|
1161
1313
|
|