re2 0.5.0 → 0.6.0.pre
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/LICENSE.txt +1 -1
- data/README.md +71 -19
- data/Rakefile +1 -3
- data/ext/re2/re2.cc +353 -201
- data/lib/re2.rb +7 -0
- data/lib/re2/consumer.rb +15 -0
- data/lib/re2/string.rb +1 -1
- data/spec/re2/consumer_spec.rb +90 -0
- data/spec/re2/match_data_spec.rb +16 -0
- data/spec/re2/regexp_spec.rb +9 -0
- data/spec/re2/string_spec.rb +2 -2
- data/spec/re2_spec.rb +8 -0
- metadata +26 -19
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 65fc1ca01f0f974602264152462be26110297250
|
4
|
+
data.tar.gz: e5fdd3a7c238d629d75d76032b557658f993553b
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: cbc9be6aef0659c89a13c0481b951467f009702e13cd4c6c426d4851c563ad16888959e2c878c79090e221909cd18f391b2bf3026b3cdaaf9d86317884e11c07
|
7
|
+
data.tar.gz: 94b8ea6f3f3cf1b353fa82c70611e2623c93fea6c189f1bac02435c8bf3a5cacfde032ad95b5a855ea4e1c63f3f94a0b66b1174ad42a269e6e5d6ed4b9793df3
|
data/LICENSE.txt
CHANGED
data/README.md
CHANGED
@@ -1,34 +1,45 @@
|
|
1
1
|
re2 [![Build Status](https://secure.travis-ci.org/mudge/re2.png?branch=master)](http://travis-ci.org/mudge/re2)
|
2
2
|
===
|
3
3
|
|
4
|
-
A Ruby binding to [re2][], an "efficient, principled regular expression
|
4
|
+
A Ruby binding to [re2][], an "efficient, principled regular expression
|
5
|
+
library".
|
5
6
|
|
6
7
|
Installation
|
7
8
|
------------
|
8
9
|
|
9
|
-
You will need [re2][] installed as well as a C++ compiler such as [gcc][] (on
|
10
|
+
You will need [re2][] installed as well as a C++ compiler such as [gcc][] (on
|
11
|
+
Debian and Ubuntu, this is provided by the [build-essential][] package). If
|
12
|
+
you are using Mac OS X, I recommend installing re2 with [Homebrew][] by
|
13
|
+
running the following:
|
10
14
|
|
11
|
-
$ brew install
|
15
|
+
$ brew install re2
|
12
16
|
|
13
17
|
If you are using Debian, you can install the [libre2-dev][] package like so:
|
14
18
|
|
15
19
|
$ sudo apt-get install libre2-dev
|
16
20
|
|
17
|
-
If you are using a packaged Ruby distribution, make sure you also have the
|
21
|
+
If you are using a packaged Ruby distribution, make sure you also have the
|
22
|
+
Ruby header files installed such as those provided by the [ruby-dev][] package
|
23
|
+
on Debian and Ubuntu.
|
18
24
|
|
19
|
-
You can then install the library via RubyGems with `gem install re2` or `gem
|
25
|
+
You can then install the library via RubyGems with `gem install re2` or `gem
|
26
|
+
install re2 -- --with-re2-dir=/opt/local/re2` if re2 is not installed in the
|
27
|
+
default location of `/usr/local/`.
|
20
28
|
|
21
29
|
Documentation
|
22
30
|
-------------
|
23
31
|
|
24
|
-
Full documentation automatically generated from the latest version is
|
32
|
+
Full documentation automatically generated from the latest version is
|
33
|
+
available at <http://rubydoc.info/github/mudge/re2>.
|
25
34
|
|
26
|
-
Bear in mind that re2's regular expression syntax differs from PCRE, see the
|
35
|
+
Bear in mind that re2's regular expression syntax differs from PCRE, see the
|
36
|
+
[official syntax page][] for more details.
|
27
37
|
|
28
38
|
Usage
|
29
39
|
-----
|
30
40
|
|
31
|
-
You can use re2 as a mostly drop-in replacement for Ruby's own [Regexp][] and
|
41
|
+
You can use re2 as a mostly drop-in replacement for Ruby's own [Regexp][] and
|
42
|
+
[MatchData][] classes:
|
32
43
|
|
33
44
|
```console
|
34
45
|
$ irb -rubygems
|
@@ -49,14 +60,17 @@ $ irb -rubygems
|
|
49
60
|
=> nil
|
50
61
|
```
|
51
62
|
|
52
|
-
As `RE2::Regexp.new` (or `RE2::Regexp.compile`) can be quite verbose, a helper
|
63
|
+
As `RE2::Regexp.new` (or `RE2::Regexp.compile`) can be quite verbose, a helper
|
64
|
+
method has been defined against `Kernel` so you can use a shorter version to
|
65
|
+
create regular expressions:
|
53
66
|
|
54
67
|
```console
|
55
68
|
> RE2('(\d+)')
|
56
69
|
=> #<RE2::Regexp /(\d+)/>
|
57
70
|
```
|
58
71
|
|
59
|
-
Note the use of *single quotes* as double quotes will interpret `\d` as `d` as
|
72
|
+
Note the use of *single quotes* as double quotes will interpret `\d` as `d` as
|
73
|
+
in the following example:
|
60
74
|
|
61
75
|
```console
|
62
76
|
> RE2("(\d+)")
|
@@ -76,7 +90,8 @@ As of 0.3.0, you can use named groups:
|
|
76
90
|
=> "40"
|
77
91
|
```
|
78
92
|
|
79
|
-
As of 0.4.0, you can mix `RE2::String` into strings to provide helpers from
|
93
|
+
As of 0.4.0, you can mix `RE2::String` into strings to provide helpers from
|
94
|
+
the opposite direction:
|
80
95
|
|
81
96
|
```console
|
82
97
|
> require "re2/string"
|
@@ -102,28 +117,64 @@ class String
|
|
102
117
|
end
|
103
118
|
```
|
104
119
|
|
120
|
+
As of 0.5.0, you can use `RE2::Regexp#consume` to incrementally scan text for
|
121
|
+
matches (similar in purpose to Ruby's
|
122
|
+
[`String#scan`](http://ruby-doc.org/core-2.0.0/String.html#method-i-scan)).
|
123
|
+
Calling `consume` will return an `RE2::Consumer` which is
|
124
|
+
[enumerable](http://ruby-doc.org/core-2.0.0/Enumerable.html) meaning you can
|
125
|
+
use `each` to iterate through the matches (and even use
|
126
|
+
[`Enumerator::Lazy`](http://ruby-doc.org/core-2.0/Enumerator/Lazy.html)):
|
127
|
+
|
128
|
+
```ruby
|
129
|
+
re = RE2('(\w+)')
|
130
|
+
consumer = re.consume("It is a truth universally acknowledged")
|
131
|
+
consumer.each do |match|
|
132
|
+
puts match
|
133
|
+
end
|
134
|
+
|
135
|
+
consumer.rewind
|
136
|
+
|
137
|
+
enum = consumer.to_enum
|
138
|
+
enum.next #=> ["It"]
|
139
|
+
enum.next #=> ["is"]
|
140
|
+
```
|
141
|
+
|
105
142
|
Features
|
106
143
|
--------
|
107
144
|
|
108
|
-
* Pre-compiling regular expressions with
|
145
|
+
* Pre-compiling regular expressions with
|
146
|
+
[`RE2::Regexp.new(re)`](http://code.google.com/p/re2/source/browse/re2/re2.h#96),
|
147
|
+
`RE2::Regexp.compile(re)` or `RE2(re)` (including specifying options, e.g.
|
148
|
+
`RE2::Regexp.new("pattern", :case_sensitive => false)`
|
109
149
|
|
110
|
-
* Extracting matches with `re2.match(text)` (and an exact number of matches
|
150
|
+
* Extracting matches with `re2.match(text)` (and an exact number of matches
|
151
|
+
with `re2.match(text, number_of_matches)` such as `re2.match("123-234", 2)`)
|
111
152
|
|
112
153
|
* Extracting matches by name (both with strings and symbols)
|
113
154
|
|
114
|
-
* Checking for matches with `re2 =~ text`, `re2 === text` (for use in `case`
|
155
|
+
* Checking for matches with `re2 =~ text`, `re2 === text` (for use in `case`
|
156
|
+
statements) and `re2 !~ text`
|
115
157
|
|
116
|
-
*
|
158
|
+
* Incrementally scanning text with `re2.consume(text)`
|
159
|
+
|
160
|
+
* Checking regular expression compilation with `re2.ok?`, `re2.error` and
|
161
|
+
`re2.error_arg`
|
117
162
|
|
118
163
|
* Checking regular expression "cost" with `re2.program_size`
|
119
164
|
|
120
|
-
* Checking the options for an expression with `re2.options` or individually
|
165
|
+
* Checking the options for an expression with `re2.options` or individually
|
166
|
+
with `re2.case_sensitive?`
|
121
167
|
|
122
|
-
* Performing in-place replacement with [`RE2.Replace(str, pattern,
|
168
|
+
* Performing in-place replacement with [`RE2.Replace(str, pattern,
|
169
|
+
replace)`](http://code.google.com/p/re2/source/browse/re2/re2.h#335)
|
123
170
|
|
124
|
-
* Performing in-place global replacement with [`RE2.GlobalReplace(str,
|
171
|
+
* Performing in-place global replacement with [`RE2.GlobalReplace(str,
|
172
|
+
pattern,
|
173
|
+
replace)`](http://code.google.com/p/re2/source/browse/re2/re2.h#352)
|
125
174
|
|
126
|
-
* Escaping regular expressions with
|
175
|
+
* Escaping regular expressions with
|
176
|
+
[`RE2::Regexp.escape(unquoted)`](http://code.google.com/p/re2/source/browse/re2/re2.h#377),
|
177
|
+
`RE2::Regexp.quote(unquoted)` or `RE2.QuoteMeta(unquoted)`
|
127
178
|
|
128
179
|
Contact
|
129
180
|
-------
|
@@ -139,3 +190,4 @@ All feedback should go to the mailing list: <mailto:ruby.re2@librelist.com>
|
|
139
190
|
[Homebrew]: http://mxcl.github.com/homebrew
|
140
191
|
[libre2-dev]: http://packages.debian.org/search?keywords=libre2-dev
|
141
192
|
[official syntax page]: http://code.google.com/p/re2/wiki/Syntax
|
193
|
+
|
data/Rakefile
CHANGED
data/ext/re2/re2.cc
CHANGED
@@ -2,18 +2,35 @@
|
|
2
2
|
* re2 (http://github.com/mudge/re2)
|
3
3
|
* Ruby bindings to re2, an "efficient, principled regular expression library"
|
4
4
|
*
|
5
|
-
* Copyright (c) 2010-
|
5
|
+
* Copyright (c) 2010-2013, Paul Mucur (http://mudge.name)
|
6
6
|
* Released under the BSD Licence, please see LICENSE.txt
|
7
7
|
*/
|
8
8
|
|
9
9
|
#include <re2/re2.h>
|
10
|
+
#include <ruby.h>
|
10
11
|
#include <string>
|
11
12
|
#include <sstream>
|
12
|
-
|
13
|
+
#include <vector>
|
14
|
+
using std::string;
|
15
|
+
using std::ostringstream;
|
16
|
+
using std::nothrow;
|
17
|
+
using std::map;
|
18
|
+
using std::vector;
|
13
19
|
|
14
20
|
extern "C" {
|
15
|
-
|
16
|
-
|
21
|
+
#ifdef HAVE_RUBY_ENCODING_H
|
22
|
+
#include <ruby/encoding.h>
|
23
|
+
#define ENCODED_STR_NEW(str, length, encoding) \
|
24
|
+
({ \
|
25
|
+
VALUE _string = rb_str_new((const char *)str, (long)length); \
|
26
|
+
int _enc = rb_enc_find_index((int)encoding); \
|
27
|
+
rb_enc_associate_index(_string, _enc); \
|
28
|
+
_string; \
|
29
|
+
})
|
30
|
+
#else
|
31
|
+
#define ENCODED_STR_NEW(str, length, encoding) \
|
32
|
+
rb_str_new((const char *)str, (long)length)
|
33
|
+
#endif
|
17
34
|
|
18
35
|
#define BOOL2RUBY(v) (v ? Qtrue : Qfalse)
|
19
36
|
#define UNUSED(x) ((void)x)
|
@@ -44,41 +61,57 @@ extern "C" {
|
|
44
61
|
VALUE regexp, text;
|
45
62
|
} re2_matchdata;
|
46
63
|
|
47
|
-
|
64
|
+
typedef struct {
|
65
|
+
re2::StringPiece input;
|
66
|
+
int argc;
|
67
|
+
VALUE regexp, text;
|
68
|
+
} re2_consumer;
|
69
|
+
|
70
|
+
VALUE re2_mRE2, re2_cRegexp, re2_cMatchData, re2_cConsumer;
|
48
71
|
|
49
72
|
/* Symbols used in RE2 options. */
|
50
73
|
static ID id_utf8, id_posix_syntax, id_longest_match, id_log_errors,
|
51
74
|
id_max_mem, id_literal, id_never_nl, id_case_sensitive,
|
52
75
|
id_perl_classes, id_word_boundary, id_one_line;
|
53
76
|
|
54
|
-
void re2_matchdata_mark(re2_matchdata* self)
|
55
|
-
{
|
77
|
+
void re2_matchdata_mark(re2_matchdata* self) {
|
56
78
|
rb_gc_mark(self->regexp);
|
57
79
|
rb_gc_mark(self->text);
|
58
80
|
}
|
59
81
|
|
60
|
-
void re2_matchdata_free(re2_matchdata* self)
|
61
|
-
{
|
82
|
+
void re2_matchdata_free(re2_matchdata* self) {
|
62
83
|
if (self->matches) {
|
63
84
|
delete[] self->matches;
|
64
85
|
}
|
65
86
|
free(self);
|
66
87
|
}
|
67
88
|
|
68
|
-
void
|
69
|
-
|
70
|
-
|
89
|
+
void re2_consumer_mark(re2_consumer* self) {
|
90
|
+
rb_gc_mark(self->regexp);
|
91
|
+
rb_gc_mark(self->text);
|
92
|
+
}
|
93
|
+
|
94
|
+
void re2_consumer_free(re2_consumer* self) {
|
95
|
+
free(self);
|
96
|
+
}
|
97
|
+
|
98
|
+
void re2_regexp_free(re2_pattern* self) {
|
71
99
|
if (self->pattern) {
|
72
100
|
delete self->pattern;
|
73
101
|
}
|
74
102
|
free(self);
|
75
103
|
}
|
76
104
|
|
77
|
-
static VALUE
|
78
|
-
re2_matchdata_allocate(VALUE klass)
|
79
|
-
{
|
105
|
+
static VALUE re2_matchdata_allocate(VALUE klass) {
|
80
106
|
re2_matchdata *m;
|
81
|
-
return Data_Make_Struct(klass, re2_matchdata, re2_matchdata_mark,
|
107
|
+
return Data_Make_Struct(klass, re2_matchdata, re2_matchdata_mark,
|
108
|
+
re2_matchdata_free, m);
|
109
|
+
}
|
110
|
+
|
111
|
+
static VALUE re2_consumer_allocate(VALUE klass) {
|
112
|
+
re2_consumer *c;
|
113
|
+
return Data_Make_Struct(klass, re2_consumer, re2_consumer_mark,
|
114
|
+
re2_consumer_free, c);
|
82
115
|
}
|
83
116
|
|
84
117
|
/*
|
@@ -89,15 +122,95 @@ extern "C" {
|
|
89
122
|
* m = RE2::Regexp.new('(\d+)').match("bob 123")
|
90
123
|
* m.string #=> "bob 123"
|
91
124
|
*/
|
92
|
-
static VALUE
|
93
|
-
re2_matchdata_string(VALUE self)
|
94
|
-
{
|
125
|
+
static VALUE re2_matchdata_string(VALUE self) {
|
95
126
|
re2_matchdata *m;
|
96
127
|
Data_Get_Struct(self, re2_matchdata, m);
|
97
128
|
|
98
129
|
return m->text;
|
99
130
|
}
|
100
131
|
|
132
|
+
/*
|
133
|
+
* Returns the string passed into the consumer.
|
134
|
+
*
|
135
|
+
* @return [String] the original string.
|
136
|
+
* @example
|
137
|
+
* c = RE2::Regexp.new('(\d+)').consume("foo")
|
138
|
+
* c.string #=> "foo"
|
139
|
+
*/
|
140
|
+
static VALUE re2_consumer_string(VALUE self) {
|
141
|
+
re2_consumer *c;
|
142
|
+
Data_Get_Struct(self, re2_consumer, c);
|
143
|
+
|
144
|
+
return c->text;
|
145
|
+
}
|
146
|
+
|
147
|
+
/*
|
148
|
+
* Rewind the consumer to the start of the string.
|
149
|
+
*
|
150
|
+
* @example
|
151
|
+
* c = RE2::Regexp.new('(\d+)').consume("1 2 3")
|
152
|
+
* e = c.to_enum
|
153
|
+
* e.next #=> ["1"]
|
154
|
+
* e.next #=> ["2"]
|
155
|
+
* c.rewind
|
156
|
+
* e.next #=> ["1"]
|
157
|
+
*/
|
158
|
+
static VALUE re2_consumer_rewind(VALUE self) {
|
159
|
+
re2_consumer *c;
|
160
|
+
Data_Get_Struct(self, re2_consumer, c);
|
161
|
+
re2::StringPiece input(RSTRING_PTR(c->text));
|
162
|
+
|
163
|
+
c->input = input;
|
164
|
+
|
165
|
+
return self;
|
166
|
+
}
|
167
|
+
|
168
|
+
/*
|
169
|
+
* Scan the given text incrementally for matches, returning an array of
|
170
|
+
* matches on each subsequent call. Returns nil if no matches are found.
|
171
|
+
*
|
172
|
+
* @return [Array<String>] the matches.
|
173
|
+
* @example
|
174
|
+
* c = RE2::Regexp.new('(\w+)').consume("Foo bar baz")
|
175
|
+
* c.consume #=> ["Foo"]
|
176
|
+
* c.consume #=> ["bar"]
|
177
|
+
*/
|
178
|
+
static VALUE re2_consumer_consume(VALUE self) {
|
179
|
+
int i;
|
180
|
+
re2_pattern *p;
|
181
|
+
re2_consumer *c;
|
182
|
+
VALUE result;
|
183
|
+
|
184
|
+
Data_Get_Struct(self, re2_consumer, c);
|
185
|
+
Data_Get_Struct(c->regexp, re2_pattern, p);
|
186
|
+
|
187
|
+
vector<RE2::Arg> argv(c->argc);
|
188
|
+
vector<RE2::Arg*> args(c->argc);
|
189
|
+
vector<string> matches(c->argc);
|
190
|
+
|
191
|
+
for (i = 0; i < c->argc; i++) {
|
192
|
+
args[i] = &argv[i];
|
193
|
+
argv[i] = &matches[i];
|
194
|
+
}
|
195
|
+
|
196
|
+
if (RE2::FindAndConsumeN(&c->input, *p->pattern, &args[0], c->argc)) {
|
197
|
+
result = rb_ary_new2(c->argc);
|
198
|
+
for (i = 0; i < c->argc; i++) {
|
199
|
+
if (matches[i].empty()) {
|
200
|
+
rb_ary_push(result, Qnil);
|
201
|
+
} else {
|
202
|
+
rb_ary_push(result, ENCODED_STR_NEW(matches[i].data(),
|
203
|
+
matches[i].size(),
|
204
|
+
p->pattern->options().utf8() ? "UTF-8" : "ISO-8859-1"));
|
205
|
+
}
|
206
|
+
}
|
207
|
+
} else {
|
208
|
+
result = Qnil;
|
209
|
+
}
|
210
|
+
|
211
|
+
return result;
|
212
|
+
}
|
213
|
+
|
101
214
|
/*
|
102
215
|
* Returns the number of elements in the match array (including nils).
|
103
216
|
*
|
@@ -107,9 +220,7 @@ extern "C" {
|
|
107
220
|
* m.size #=> 2
|
108
221
|
* m.length #=> 2
|
109
222
|
*/
|
110
|
-
static VALUE
|
111
|
-
re2_matchdata_size(VALUE self)
|
112
|
-
{
|
223
|
+
static VALUE re2_matchdata_size(VALUE self) {
|
113
224
|
re2_matchdata *m;
|
114
225
|
Data_Get_Struct(self, re2_matchdata, m);
|
115
226
|
|
@@ -124,17 +235,28 @@ extern "C" {
|
|
124
235
|
* m = RE2::Regexp.new('(\d+)').match("bob 123")
|
125
236
|
* m.regexp #=> #<RE2::Regexp /(\d+)/>
|
126
237
|
*/
|
127
|
-
static VALUE
|
128
|
-
re2_matchdata_regexp(VALUE self)
|
129
|
-
{
|
238
|
+
static VALUE re2_matchdata_regexp(VALUE self) {
|
130
239
|
re2_matchdata *m;
|
131
240
|
Data_Get_Struct(self, re2_matchdata, m);
|
132
241
|
return m->regexp;
|
133
242
|
}
|
134
243
|
|
135
|
-
|
136
|
-
|
137
|
-
|
244
|
+
/*
|
245
|
+
* Returns the {RE2::Regexp} used in the consumer.
|
246
|
+
*
|
247
|
+
* @return [RE2::Regexp] the regexp used in the consumer
|
248
|
+
* @example
|
249
|
+
* c = RE2::Regexp.new('(\d+)').consume("bob 123")
|
250
|
+
* c.regexp #=> #<RE2::Regexp /(\d+)/>
|
251
|
+
*/
|
252
|
+
static VALUE re2_consumer_regexp(VALUE self) {
|
253
|
+
re2_consumer *c;
|
254
|
+
Data_Get_Struct(self, re2_consumer, c);
|
255
|
+
|
256
|
+
return c->regexp;
|
257
|
+
}
|
258
|
+
|
259
|
+
static VALUE re2_regexp_allocate(VALUE klass) {
|
138
260
|
re2_pattern *p;
|
139
261
|
return Data_Make_Struct(klass, re2_pattern, 0, re2_regexp_free, p);
|
140
262
|
}
|
@@ -147,15 +269,15 @@ extern "C" {
|
|
147
269
|
* m = RE2::Regexp.new('(\d+)').match("bob 123")
|
148
270
|
* m.to_a #=> ["123", "123"]
|
149
271
|
*/
|
150
|
-
static VALUE
|
151
|
-
re2_matchdata_to_a(VALUE self)
|
152
|
-
{
|
272
|
+
static VALUE re2_matchdata_to_a(VALUE self) {
|
153
273
|
int i;
|
154
274
|
re2_matchdata *m;
|
275
|
+
re2_pattern *p;
|
155
276
|
re2::StringPiece match;
|
156
277
|
VALUE array;
|
157
278
|
|
158
279
|
Data_Get_Struct(self, re2_matchdata, m);
|
280
|
+
Data_Get_Struct(m->regexp, re2_pattern, p);
|
159
281
|
|
160
282
|
array = rb_ary_new2(m->number_of_matches);
|
161
283
|
for (i = 0; i < m->number_of_matches; i++) {
|
@@ -163,20 +285,21 @@ extern "C" {
|
|
163
285
|
rb_ary_push(array, Qnil);
|
164
286
|
} else {
|
165
287
|
match = m->matches[i];
|
166
|
-
rb_ary_push(array,
|
288
|
+
rb_ary_push(array, ENCODED_STR_NEW(match.data(), match.size(),
|
289
|
+
p->pattern->options().utf8() ? "UTF-8" : "ISO-8859-1"));
|
167
290
|
}
|
168
291
|
}
|
169
292
|
|
170
293
|
return array;
|
171
294
|
}
|
172
295
|
|
173
|
-
static VALUE
|
174
|
-
re2_matchdata_nth_match(int nth, VALUE self)
|
175
|
-
{
|
296
|
+
static VALUE re2_matchdata_nth_match(int nth, VALUE self) {
|
176
297
|
re2_matchdata *m;
|
298
|
+
re2_pattern *p;
|
177
299
|
re2::StringPiece match;
|
178
300
|
|
179
301
|
Data_Get_Struct(self, re2_matchdata, m);
|
302
|
+
Data_Get_Struct(m->regexp, re2_pattern, p);
|
180
303
|
|
181
304
|
if (nth < 0 || nth >= m->number_of_matches) {
|
182
305
|
return Qnil;
|
@@ -186,14 +309,13 @@ extern "C" {
|
|
186
309
|
if (match.empty()) {
|
187
310
|
return Qnil;
|
188
311
|
} else {
|
189
|
-
return
|
312
|
+
return ENCODED_STR_NEW(match.data(), match.size(),
|
313
|
+
p->pattern->options().utf8() ? "UTF-8" : "ISO-8859-1");
|
190
314
|
}
|
191
315
|
}
|
192
316
|
}
|
193
317
|
|
194
|
-
static VALUE
|
195
|
-
re2_matchdata_named_match(const char* name, VALUE self)
|
196
|
-
{
|
318
|
+
static VALUE re2_matchdata_named_match(const char* name, VALUE self) {
|
197
319
|
int idx;
|
198
320
|
re2_matchdata *m;
|
199
321
|
re2_pattern *p;
|
@@ -256,9 +378,7 @@ extern "C" {
|
|
256
378
|
* m["number"] #=> "123"
|
257
379
|
* m[:number] #=> "123"
|
258
380
|
*/
|
259
|
-
static VALUE
|
260
|
-
re2_matchdata_aref(int argc, VALUE *argv, VALUE self)
|
261
|
-
{
|
381
|
+
static VALUE re2_matchdata_aref(int argc, VALUE *argv, VALUE self) {
|
262
382
|
VALUE idx, rest;
|
263
383
|
rb_scan_args(argc, argv, "11", &idx, &rest);
|
264
384
|
|
@@ -278,9 +398,7 @@ extern "C" {
|
|
278
398
|
*
|
279
399
|
* @return [String] the entire matched string
|
280
400
|
*/
|
281
|
-
static VALUE
|
282
|
-
re2_matchdata_to_s(VALUE self)
|
283
|
-
{
|
401
|
+
static VALUE re2_matchdata_to_s(VALUE self) {
|
284
402
|
return re2_matchdata_nth_match(0, self);
|
285
403
|
}
|
286
404
|
|
@@ -292,17 +410,15 @@ extern "C" {
|
|
292
410
|
* m = RE2::Regexp.new('(\d+)').match("bob 123")
|
293
411
|
* m.inspect #=> "#<RE2::MatchData \"123\" 1:\"123\">"
|
294
412
|
*/
|
295
|
-
static VALUE
|
296
|
-
re2_matchdata_inspect(VALUE self)
|
297
|
-
{
|
413
|
+
static VALUE re2_matchdata_inspect(VALUE self) {
|
298
414
|
int i;
|
299
415
|
re2_matchdata *m;
|
416
|
+
re2_pattern *p;
|
300
417
|
VALUE match, result;
|
301
418
|
ostringstream output;
|
302
419
|
|
303
420
|
Data_Get_Struct(self, re2_matchdata, m);
|
304
|
-
|
305
|
-
result = rb_str_new("#<RE2::MatchData", 16);
|
421
|
+
Data_Get_Struct(m->regexp, re2_pattern, p);
|
306
422
|
|
307
423
|
output << "#<RE2::MatchData";
|
308
424
|
|
@@ -324,7 +440,8 @@ extern "C" {
|
|
324
440
|
|
325
441
|
output << ">";
|
326
442
|
|
327
|
-
result =
|
443
|
+
result = ENCODED_STR_NEW(output.str().data(), output.str().length(),
|
444
|
+
p->pattern->options().utf8() ? "UTF-8" : "ISO-8859-1");
|
328
445
|
|
329
446
|
return result;
|
330
447
|
}
|
@@ -339,9 +456,7 @@ extern "C" {
|
|
339
456
|
* @see RE2::Regexp.new
|
340
457
|
*
|
341
458
|
*/
|
342
|
-
static VALUE
|
343
|
-
re2_re2(int argc, VALUE *argv, VALUE self)
|
344
|
-
{
|
459
|
+
static VALUE re2_re2(int argc, VALUE *argv, VALUE self) {
|
345
460
|
UNUSED(self);
|
346
461
|
return rb_class_new_instance(argc, argv, re2_cRegexp);
|
347
462
|
}
|
@@ -358,7 +473,8 @@ extern "C" {
|
|
358
473
|
*
|
359
474
|
* @param [String] pattern the pattern to compile
|
360
475
|
* @return [RE2::Regexp] an RE2::Regexp with the specified pattern
|
361
|
-
* @raise [NoMemoryError] if memory could not be allocated for the compiled
|
476
|
+
* @raise [NoMemoryError] if memory could not be allocated for the compiled
|
477
|
+
* pattern
|
362
478
|
*
|
363
479
|
* @overload initialize(pattern, options)
|
364
480
|
* Returns a new {RE2::Regexp} object with a compiled version of
|
@@ -380,11 +496,9 @@ extern "C" {
|
|
380
496
|
* @return [RE2::Regexp] an RE2::Regexp with the specified pattern and options
|
381
497
|
* @raise [NoMemoryError] if memory could not be allocated for the compiled pattern
|
382
498
|
*/
|
383
|
-
static VALUE
|
384
|
-
re2_regexp_initialize(int argc, VALUE *argv, VALUE self)
|
385
|
-
{
|
499
|
+
static VALUE re2_regexp_initialize(int argc, VALUE *argv, VALUE self) {
|
386
500
|
VALUE pattern, options, utf8, posix_syntax, longest_match, log_errors,
|
387
|
-
max_mem, literal, never_nl, case_sensitive, perl_classes,
|
501
|
+
max_mem, literal, never_nl, case_sensitive, perl_classes,
|
388
502
|
word_boundary, one_line;
|
389
503
|
re2_pattern *p;
|
390
504
|
|
@@ -453,9 +567,9 @@ extern "C" {
|
|
453
567
|
re2_options.set_one_line(RTEST(one_line));
|
454
568
|
}
|
455
569
|
|
456
|
-
p->pattern = new
|
570
|
+
p->pattern = new(nothrow) RE2(StringValuePtr(pattern), re2_options);
|
457
571
|
} else {
|
458
|
-
p->pattern = new
|
572
|
+
p->pattern = new(nothrow) RE2(StringValuePtr(pattern));
|
459
573
|
}
|
460
574
|
|
461
575
|
if (p->pattern == 0) {
|
@@ -473,9 +587,7 @@ extern "C" {
|
|
473
587
|
* re2 = RE2::Regexp.new("woo?")
|
474
588
|
* re2.inspect #=> "#<RE2::Regexp /woo?/>"
|
475
589
|
*/
|
476
|
-
static VALUE
|
477
|
-
re2_regexp_inspect(VALUE self)
|
478
|
-
{
|
590
|
+
static VALUE re2_regexp_inspect(VALUE self) {
|
479
591
|
re2_pattern *p;
|
480
592
|
VALUE result;
|
481
593
|
ostringstream output;
|
@@ -484,7 +596,8 @@ extern "C" {
|
|
484
596
|
|
485
597
|
output << "#<RE2::Regexp /" << p->pattern->pattern() << "/>";
|
486
598
|
|
487
|
-
result =
|
599
|
+
result = ENCODED_STR_NEW(output.str().data(), output.str().length(),
|
600
|
+
p->pattern->options().utf8() ? "UTF-8" : "ISO-8859-1");
|
488
601
|
|
489
602
|
return result;
|
490
603
|
}
|
@@ -497,12 +610,12 @@ extern "C" {
|
|
497
610
|
* re2 = RE2::Regexp.new("woo?")
|
498
611
|
* re2.to_s #=> "woo?"
|
499
612
|
*/
|
500
|
-
static VALUE
|
501
|
-
re2_regexp_to_s(VALUE self)
|
502
|
-
{
|
613
|
+
static VALUE re2_regexp_to_s(VALUE self) {
|
503
614
|
re2_pattern *p;
|
504
615
|
Data_Get_Struct(self, re2_pattern, p);
|
505
|
-
return
|
616
|
+
return ENCODED_STR_NEW(p->pattern->pattern().data(),
|
617
|
+
p->pattern->pattern().size(),
|
618
|
+
p->pattern->options().utf8() ? "UTF-8" : "ISO-8859-1");
|
506
619
|
}
|
507
620
|
|
508
621
|
/*
|
@@ -514,9 +627,7 @@ extern "C" {
|
|
514
627
|
* re2 = RE2::Regexp.new("woo?")
|
515
628
|
* re2.ok? #=> true
|
516
629
|
*/
|
517
|
-
static VALUE
|
518
|
-
re2_regexp_ok(VALUE self)
|
519
|
-
{
|
630
|
+
static VALUE re2_regexp_ok(VALUE self) {
|
520
631
|
re2_pattern *p;
|
521
632
|
Data_Get_Struct(self, re2_pattern, p);
|
522
633
|
return BOOL2RUBY(p->pattern->ok());
|
@@ -531,9 +642,7 @@ extern "C" {
|
|
531
642
|
* re2 = RE2::Regexp.new("woo?", :utf8 => true)
|
532
643
|
* re2.utf8? #=> true
|
533
644
|
*/
|
534
|
-
static VALUE
|
535
|
-
re2_regexp_utf8(VALUE self)
|
536
|
-
{
|
645
|
+
static VALUE re2_regexp_utf8(VALUE self) {
|
537
646
|
re2_pattern *p;
|
538
647
|
Data_Get_Struct(self, re2_pattern, p);
|
539
648
|
return BOOL2RUBY(p->pattern->options().utf8());
|
@@ -548,9 +657,7 @@ extern "C" {
|
|
548
657
|
* re2 = RE2::Regexp.new("woo?", :posix_syntax => true)
|
549
658
|
* re2.posix_syntax? #=> true
|
550
659
|
*/
|
551
|
-
static VALUE
|
552
|
-
re2_regexp_posix_syntax(VALUE self)
|
553
|
-
{
|
660
|
+
static VALUE re2_regexp_posix_syntax(VALUE self) {
|
554
661
|
re2_pattern *p;
|
555
662
|
Data_Get_Struct(self, re2_pattern, p);
|
556
663
|
return BOOL2RUBY(p->pattern->options().posix_syntax());
|
@@ -565,9 +672,7 @@ extern "C" {
|
|
565
672
|
* re2 = RE2::Regexp.new("woo?", :longest_match => true)
|
566
673
|
* re2.longest_match? #=> true
|
567
674
|
*/
|
568
|
-
static VALUE
|
569
|
-
re2_regexp_longest_match(VALUE self)
|
570
|
-
{
|
675
|
+
static VALUE re2_regexp_longest_match(VALUE self) {
|
571
676
|
re2_pattern *p;
|
572
677
|
Data_Get_Struct(self, re2_pattern, p);
|
573
678
|
return BOOL2RUBY(p->pattern->options().longest_match());
|
@@ -582,9 +687,7 @@ extern "C" {
|
|
582
687
|
* re2 = RE2::Regexp.new("woo?", :log_errors => true)
|
583
688
|
* re2.log_errors? #=> true
|
584
689
|
*/
|
585
|
-
static VALUE
|
586
|
-
re2_regexp_log_errors(VALUE self)
|
587
|
-
{
|
690
|
+
static VALUE re2_regexp_log_errors(VALUE self) {
|
588
691
|
re2_pattern *p;
|
589
692
|
Data_Get_Struct(self, re2_pattern, p);
|
590
693
|
return BOOL2RUBY(p->pattern->options().log_errors());
|
@@ -599,9 +702,7 @@ extern "C" {
|
|
599
702
|
* re2 = RE2::Regexp.new("woo?", :max_mem => 1024)
|
600
703
|
* re2.max_mem #=> 1024
|
601
704
|
*/
|
602
|
-
static VALUE
|
603
|
-
re2_regexp_max_mem(VALUE self)
|
604
|
-
{
|
705
|
+
static VALUE re2_regexp_max_mem(VALUE self) {
|
605
706
|
re2_pattern *p;
|
606
707
|
Data_Get_Struct(self, re2_pattern, p);
|
607
708
|
return INT2FIX(p->pattern->options().max_mem());
|
@@ -616,9 +717,7 @@ extern "C" {
|
|
616
717
|
* re2 = RE2::Regexp.new("woo?", :literal => true)
|
617
718
|
* re2.literal? #=> true
|
618
719
|
*/
|
619
|
-
static VALUE
|
620
|
-
re2_regexp_literal(VALUE self)
|
621
|
-
{
|
720
|
+
static VALUE re2_regexp_literal(VALUE self) {
|
622
721
|
re2_pattern *p;
|
623
722
|
Data_Get_Struct(self, re2_pattern, p);
|
624
723
|
return BOOL2RUBY(p->pattern->options().literal());
|
@@ -633,9 +732,7 @@ extern "C" {
|
|
633
732
|
* re2 = RE2::Regexp.new("woo?", :never_nl => true)
|
634
733
|
* re2.never_nl? #=> true
|
635
734
|
*/
|
636
|
-
static VALUE
|
637
|
-
re2_regexp_never_nl(VALUE self)
|
638
|
-
{
|
735
|
+
static VALUE re2_regexp_never_nl(VALUE self) {
|
639
736
|
re2_pattern *p;
|
640
737
|
Data_Get_Struct(self, re2_pattern, p);
|
641
738
|
return BOOL2RUBY(p->pattern->options().never_nl());
|
@@ -650,9 +747,7 @@ extern "C" {
|
|
650
747
|
* re2 = RE2::Regexp.new("woo?", :case_sensitive => true)
|
651
748
|
* re2.case_sensitive? #=> true
|
652
749
|
*/
|
653
|
-
static VALUE
|
654
|
-
re2_regexp_case_sensitive(VALUE self)
|
655
|
-
{
|
750
|
+
static VALUE re2_regexp_case_sensitive(VALUE self) {
|
656
751
|
re2_pattern *p;
|
657
752
|
Data_Get_Struct(self, re2_pattern, p);
|
658
753
|
return BOOL2RUBY(p->pattern->options().case_sensitive());
|
@@ -668,9 +763,7 @@ extern "C" {
|
|
668
763
|
* re2.case_insensitive? #=> false
|
669
764
|
* re2.casefold? #=> false
|
670
765
|
*/
|
671
|
-
static VALUE
|
672
|
-
re2_regexp_case_insensitive(VALUE self)
|
673
|
-
{
|
766
|
+
static VALUE re2_regexp_case_insensitive(VALUE self) {
|
674
767
|
return BOOL2RUBY(re2_regexp_case_sensitive(self) != Qtrue);
|
675
768
|
}
|
676
769
|
|
@@ -683,9 +776,7 @@ extern "C" {
|
|
683
776
|
* re2 = RE2::Regexp.new("woo?", :perl_classes => true)
|
684
777
|
* re2.perl_classes? #=> true
|
685
778
|
*/
|
686
|
-
static VALUE
|
687
|
-
re2_regexp_perl_classes(VALUE self)
|
688
|
-
{
|
779
|
+
static VALUE re2_regexp_perl_classes(VALUE self) {
|
689
780
|
re2_pattern *p;
|
690
781
|
Data_Get_Struct(self, re2_pattern, p);
|
691
782
|
return BOOL2RUBY(p->pattern->options().perl_classes());
|
@@ -700,9 +791,7 @@ extern "C" {
|
|
700
791
|
* re2 = RE2::Regexp.new("woo?", :word_boundary => true)
|
701
792
|
* re2.word_boundary? #=> true
|
702
793
|
*/
|
703
|
-
static VALUE
|
704
|
-
re2_regexp_word_boundary(VALUE self)
|
705
|
-
{
|
794
|
+
static VALUE re2_regexp_word_boundary(VALUE self) {
|
706
795
|
re2_pattern *p;
|
707
796
|
Data_Get_Struct(self, re2_pattern, p);
|
708
797
|
return BOOL2RUBY(p->pattern->options().word_boundary());
|
@@ -717,9 +806,7 @@ extern "C" {
|
|
717
806
|
* re2 = RE2::Regexp.new("woo?", :one_line => true)
|
718
807
|
* re2.one_line? #=> true
|
719
808
|
*/
|
720
|
-
static VALUE
|
721
|
-
re2_regexp_one_line(VALUE self)
|
722
|
-
{
|
809
|
+
static VALUE re2_regexp_one_line(VALUE self) {
|
723
810
|
re2_pattern *p;
|
724
811
|
Data_Get_Struct(self, re2_pattern, p);
|
725
812
|
return BOOL2RUBY(p->pattern->options().one_line());
|
@@ -731,9 +818,7 @@ extern "C" {
|
|
731
818
|
*
|
732
819
|
* @return [String, nil] the error string or nil
|
733
820
|
*/
|
734
|
-
static VALUE
|
735
|
-
re2_regexp_error(VALUE self)
|
736
|
-
{
|
821
|
+
static VALUE re2_regexp_error(VALUE self) {
|
737
822
|
re2_pattern *p;
|
738
823
|
Data_Get_Struct(self, re2_pattern, p);
|
739
824
|
if (p->pattern->ok()) {
|
@@ -749,15 +834,15 @@ extern "C" {
|
|
749
834
|
*
|
750
835
|
* @return [String, nil] the offending portion of the regexp or nil
|
751
836
|
*/
|
752
|
-
static VALUE
|
753
|
-
re2_regexp_error_arg(VALUE self)
|
754
|
-
{
|
837
|
+
static VALUE re2_regexp_error_arg(VALUE self) {
|
755
838
|
re2_pattern *p;
|
756
839
|
Data_Get_Struct(self, re2_pattern, p);
|
757
840
|
if (p->pattern->ok()) {
|
758
841
|
return Qnil;
|
759
842
|
} else {
|
760
|
-
return
|
843
|
+
return ENCODED_STR_NEW(p->pattern->error_arg().data(),
|
844
|
+
p->pattern->error_arg().size(),
|
845
|
+
p->pattern->options().utf8() ? "UTF-8" : "ISO-8859-1");
|
761
846
|
}
|
762
847
|
}
|
763
848
|
|
@@ -768,9 +853,7 @@ extern "C" {
|
|
768
853
|
*
|
769
854
|
* @return [Fixnum] the regexp "cost"
|
770
855
|
*/
|
771
|
-
static VALUE
|
772
|
-
re2_regexp_program_size(VALUE self)
|
773
|
-
{
|
856
|
+
static VALUE re2_regexp_program_size(VALUE self) {
|
774
857
|
re2_pattern *p;
|
775
858
|
Data_Get_Struct(self, re2_pattern, p);
|
776
859
|
return INT2FIX(p->pattern->ProgramSize());
|
@@ -782,9 +865,7 @@ extern "C" {
|
|
782
865
|
*
|
783
866
|
* @return [Hash] the options
|
784
867
|
*/
|
785
|
-
static VALUE
|
786
|
-
re2_regexp_options(VALUE self)
|
787
|
-
{
|
868
|
+
static VALUE re2_regexp_options(VALUE self) {
|
788
869
|
VALUE options;
|
789
870
|
re2_pattern *p;
|
790
871
|
|
@@ -837,9 +918,7 @@ extern "C" {
|
|
837
918
|
*
|
838
919
|
* @return [Fixnum] the number of capturing subpatterns
|
839
920
|
*/
|
840
|
-
static VALUE
|
841
|
-
re2_regexp_number_of_capturing_groups(VALUE self)
|
842
|
-
{
|
921
|
+
static VALUE re2_regexp_number_of_capturing_groups(VALUE self) {
|
843
922
|
re2_pattern *p;
|
844
923
|
|
845
924
|
Data_Get_Struct(self, re2_pattern, p);
|
@@ -851,9 +930,7 @@ extern "C" {
|
|
851
930
|
*
|
852
931
|
* @return [Hash] a hash of names to capturing indices
|
853
932
|
*/
|
854
|
-
static VALUE
|
855
|
-
re2_regexp_named_capturing_groups(VALUE self)
|
856
|
-
{
|
933
|
+
static VALUE re2_regexp_named_capturing_groups(VALUE self) {
|
857
934
|
VALUE capturing_groups;
|
858
935
|
re2_pattern *p;
|
859
936
|
map<string, int> groups;
|
@@ -865,7 +942,8 @@ extern "C" {
|
|
865
942
|
|
866
943
|
for (iterator = groups.begin(); iterator != groups.end(); iterator++) {
|
867
944
|
rb_hash_aset(capturing_groups,
|
868
|
-
|
945
|
+
ENCODED_STR_NEW(iterator->first.data(), iterator->first.size(),
|
946
|
+
p->pattern->options().utf8() ? "UTF-8" : "ISO-8859-1"),
|
869
947
|
INT2FIX(iterator->second));
|
870
948
|
}
|
871
949
|
|
@@ -916,9 +994,7 @@ extern "C" {
|
|
916
994
|
* r.match('woo', 1) #=> #<RE2::MatchData "woo" 1:"o">
|
917
995
|
* r.match('woo', 3) #=> #<RE2::MatchData "woo" 1:"o" 2:"o" 3:nil>
|
918
996
|
*/
|
919
|
-
static VALUE
|
920
|
-
re2_regexp_match(int argc, VALUE *argv, VALUE self)
|
921
|
-
{
|
997
|
+
static VALUE re2_regexp_match(int argc, VALUE *argv, VALUE self) {
|
922
998
|
int n;
|
923
999
|
bool matched;
|
924
1000
|
re2_pattern *p;
|
@@ -936,7 +1012,8 @@ extern "C" {
|
|
936
1012
|
}
|
937
1013
|
|
938
1014
|
if (n == 0) {
|
939
|
-
matched = match(p->pattern, StringValuePtr(text), 0,
|
1015
|
+
matched = match(p->pattern, StringValuePtr(text), 0,
|
1016
|
+
static_cast<int>(RSTRING_LEN(text)), RE2::UNANCHORED, 0, 0);
|
940
1017
|
return BOOL2RUBY(matched);
|
941
1018
|
} else {
|
942
1019
|
|
@@ -945,18 +1022,21 @@ extern "C" {
|
|
945
1022
|
|
946
1023
|
matchdata = rb_class_new_instance(0, 0, re2_cMatchData);
|
947
1024
|
Data_Get_Struct(matchdata, re2_matchdata, m);
|
948
|
-
m->matches = new
|
1025
|
+
m->matches = new(nothrow) re2::StringPiece[n];
|
949
1026
|
m->regexp = self;
|
950
1027
|
m->text = rb_str_dup(text);
|
951
1028
|
rb_str_freeze(m->text);
|
952
1029
|
|
953
1030
|
if (m->matches == 0) {
|
954
|
-
rb_raise(rb_eNoMemError,
|
1031
|
+
rb_raise(rb_eNoMemError,
|
1032
|
+
"not enough memory to allocate StringPieces for matches");
|
955
1033
|
}
|
956
1034
|
|
957
1035
|
m->number_of_matches = n;
|
958
1036
|
|
959
|
-
matched = match(p->pattern, StringValuePtr(text), 0,
|
1037
|
+
matched = match(p->pattern, StringValuePtr(text), 0,
|
1038
|
+
static_cast<int>(RSTRING_LEN(text)),
|
1039
|
+
RE2::UNANCHORED, m->matches, n);
|
960
1040
|
|
961
1041
|
if (matched) {
|
962
1042
|
return matchdata;
|
@@ -972,9 +1052,7 @@ extern "C" {
|
|
972
1052
|
*
|
973
1053
|
* @return [Boolean] whether the match was successful
|
974
1054
|
*/
|
975
|
-
static VALUE
|
976
|
-
re2_regexp_match_query(VALUE self, VALUE text)
|
977
|
-
{
|
1055
|
+
static VALUE re2_regexp_match_query(VALUE self, VALUE text) {
|
978
1056
|
VALUE argv[2];
|
979
1057
|
argv[0] = text;
|
980
1058
|
argv[1] = INT2FIX(0);
|
@@ -983,7 +1061,30 @@ extern "C" {
|
|
983
1061
|
}
|
984
1062
|
|
985
1063
|
/*
|
986
|
-
*
|
1064
|
+
* Returns a {RE2::Consumer} for scanning the given text incrementally.
|
1065
|
+
*
|
1066
|
+
* @example
|
1067
|
+
* c = RE2::Regexp.new('(\w+)').consume("Foo bar baz")
|
1068
|
+
*/
|
1069
|
+
static VALUE re2_regexp_consume(VALUE self, VALUE text) {
|
1070
|
+
re2_pattern *p;
|
1071
|
+
re2_consumer *c;
|
1072
|
+
VALUE consumer;
|
1073
|
+
re2::StringPiece input(RSTRING_PTR(text));
|
1074
|
+
|
1075
|
+
Data_Get_Struct(self, re2_pattern, p);
|
1076
|
+
consumer = rb_class_new_instance(0, 0, re2_cConsumer);
|
1077
|
+
Data_Get_Struct(consumer, re2_consumer, c);
|
1078
|
+
c->input = input;
|
1079
|
+
c->regexp = self;
|
1080
|
+
c->text = text;
|
1081
|
+
c->argc = p->pattern->NumberOfCapturingGroups();
|
1082
|
+
|
1083
|
+
return consumer;
|
1084
|
+
}
|
1085
|
+
|
1086
|
+
/*
|
1087
|
+
* Replaces the first occurrence +pattern+ in +str+ with
|
987
1088
|
* +rewrite+ <i>in place</i>.
|
988
1089
|
*
|
989
1090
|
* @param [String] str the string to modify
|
@@ -996,11 +1097,10 @@ extern "C" {
|
|
996
1097
|
* RE2.Replace("hello there", re2, "yo") #=> "yo there"
|
997
1098
|
* text = "Good morning"
|
998
1099
|
* RE2.Replace(text, "morn", "even") #=> "Good evening"
|
999
|
-
* text
|
1100
|
+
* text #=> "Good evening"
|
1000
1101
|
*/
|
1001
|
-
static VALUE
|
1002
|
-
|
1003
|
-
{
|
1102
|
+
static VALUE re2_Replace(VALUE self, VALUE str, VALUE pattern,
|
1103
|
+
VALUE rewrite) {
|
1004
1104
|
|
1005
1105
|
/* Look out for frozen strings. */
|
1006
1106
|
rb_check_frozen(str);
|
@@ -1017,7 +1117,8 @@ extern "C" {
|
|
1017
1117
|
Data_Get_Struct(pattern, re2_pattern, p);
|
1018
1118
|
RE2::Replace(&str_as_string, *p->pattern, StringValuePtr(rewrite));
|
1019
1119
|
} else {
|
1020
|
-
RE2::Replace(&str_as_string, StringValuePtr(pattern),
|
1120
|
+
RE2::Replace(&str_as_string, StringValuePtr(pattern),
|
1121
|
+
StringValuePtr(rewrite));
|
1021
1122
|
}
|
1022
1123
|
|
1023
1124
|
/* Save the replacement as a VALUE. */
|
@@ -1033,7 +1134,7 @@ extern "C" {
|
|
1033
1134
|
}
|
1034
1135
|
|
1035
1136
|
/*
|
1036
|
-
* Replaces every occurrence of +pattern+ in +str+ with
|
1137
|
+
* Replaces every occurrence of +pattern+ in +str+ with
|
1037
1138
|
* +rewrite+ <i>in place</i>.
|
1038
1139
|
*
|
1039
1140
|
* @param [String] str the string to modify
|
@@ -1048,9 +1149,8 @@ extern "C" {
|
|
1048
1149
|
* RE2.GlobalReplace(text, "o", "ee") #=> "Geeeed meerning"
|
1049
1150
|
* text #=> "Geeeed meerning"
|
1050
1151
|
*/
|
1051
|
-
static VALUE
|
1052
|
-
|
1053
|
-
{
|
1152
|
+
static VALUE re2_GlobalReplace(VALUE self, VALUE str, VALUE pattern,
|
1153
|
+
VALUE rewrite) {
|
1054
1154
|
|
1055
1155
|
/* Look out for frozen strings. */
|
1056
1156
|
rb_check_frozen(str);
|
@@ -1067,7 +1167,8 @@ extern "C" {
|
|
1067
1167
|
Data_Get_Struct(pattern, re2_pattern, p);
|
1068
1168
|
RE2::GlobalReplace(&str_as_string, *p->pattern, StringValuePtr(rewrite));
|
1069
1169
|
} else {
|
1070
|
-
RE2::GlobalReplace(&str_as_string, StringValuePtr(pattern),
|
1170
|
+
RE2::GlobalReplace(&str_as_string, StringValuePtr(pattern),
|
1171
|
+
StringValuePtr(rewrite));
|
1071
1172
|
}
|
1072
1173
|
|
1073
1174
|
/* Save the replacement as a VALUE. */
|
@@ -1092,70 +1193,121 @@ extern "C" {
|
|
1092
1193
|
* @example
|
1093
1194
|
* RE2::Regexp.escape("1.5-2.0?") #=> "1\.5\-2\.0\?"
|
1094
1195
|
*/
|
1095
|
-
static VALUE
|
1096
|
-
re2_QuoteMeta(VALUE self, VALUE unquoted)
|
1097
|
-
{
|
1196
|
+
static VALUE re2_QuoteMeta(VALUE self, VALUE unquoted) {
|
1098
1197
|
UNUSED(self);
|
1099
1198
|
string quoted_string = RE2::QuoteMeta(StringValuePtr(unquoted));
|
1100
1199
|
return rb_str_new(quoted_string.data(), quoted_string.size());
|
1101
1200
|
}
|
1102
1201
|
|
1103
|
-
void
|
1104
|
-
Init_re2()
|
1105
|
-
{
|
1202
|
+
void Init_re2(void) {
|
1106
1203
|
re2_mRE2 = rb_define_module("RE2");
|
1107
1204
|
re2_cRegexp = rb_define_class_under(re2_mRE2, "Regexp", rb_cObject);
|
1108
1205
|
re2_cMatchData = rb_define_class_under(re2_mRE2, "MatchData", rb_cObject);
|
1206
|
+
re2_cConsumer = rb_define_class_under(re2_mRE2, "Consumer", rb_cObject);
|
1109
1207
|
|
1110
1208
|
rb_define_alloc_func(re2_cRegexp, (VALUE (*)(VALUE))re2_regexp_allocate);
|
1111
|
-
rb_define_alloc_func(re2_cMatchData,
|
1112
|
-
|
1113
|
-
|
1114
|
-
|
1115
|
-
|
1116
|
-
rb_define_method(re2_cMatchData, "
|
1117
|
-
|
1118
|
-
rb_define_method(re2_cMatchData, "
|
1119
|
-
|
1120
|
-
rb_define_method(re2_cMatchData, "
|
1121
|
-
|
1122
|
-
rb_define_method(
|
1209
|
+
rb_define_alloc_func(re2_cMatchData,
|
1210
|
+
(VALUE (*)(VALUE))re2_matchdata_allocate);
|
1211
|
+
rb_define_alloc_func(re2_cConsumer,
|
1212
|
+
(VALUE (*)(VALUE))re2_consumer_allocate);
|
1213
|
+
|
1214
|
+
rb_define_method(re2_cMatchData, "string",
|
1215
|
+
RUBY_METHOD_FUNC(re2_matchdata_string), 0);
|
1216
|
+
rb_define_method(re2_cMatchData, "regexp",
|
1217
|
+
RUBY_METHOD_FUNC(re2_matchdata_regexp), 0);
|
1218
|
+
rb_define_method(re2_cMatchData, "to_a",
|
1219
|
+
RUBY_METHOD_FUNC(re2_matchdata_to_a), 0);
|
1220
|
+
rb_define_method(re2_cMatchData, "size",
|
1221
|
+
RUBY_METHOD_FUNC(re2_matchdata_size), 0);
|
1222
|
+
rb_define_method(re2_cMatchData, "length",
|
1223
|
+
RUBY_METHOD_FUNC(re2_matchdata_size), 0);
|
1224
|
+
rb_define_method(re2_cMatchData, "[]", RUBY_METHOD_FUNC(re2_matchdata_aref),
|
1225
|
+
-1); rb_define_method(re2_cMatchData, "to_s",
|
1226
|
+
RUBY_METHOD_FUNC(re2_matchdata_to_s), 0);
|
1227
|
+
rb_define_method(re2_cMatchData, "inspect",
|
1228
|
+
RUBY_METHOD_FUNC(re2_matchdata_inspect), 0);
|
1229
|
+
|
1230
|
+
rb_define_method(re2_cConsumer, "string",
|
1231
|
+
RUBY_METHOD_FUNC(re2_consumer_string), 0);
|
1232
|
+
rb_define_method(re2_cConsumer, "regexp",
|
1233
|
+
RUBY_METHOD_FUNC(re2_consumer_regexp), 0);
|
1234
|
+
rb_define_method(re2_cConsumer, "consume",
|
1235
|
+
RUBY_METHOD_FUNC(re2_consumer_consume), 0);
|
1236
|
+
rb_define_method(re2_cConsumer, "rewind",
|
1237
|
+
RUBY_METHOD_FUNC(re2_consumer_rewind), 0);
|
1238
|
+
|
1239
|
+
rb_define_method(re2_cRegexp, "initialize",
|
1240
|
+
RUBY_METHOD_FUNC(re2_regexp_initialize), -1);
|
1123
1241
|
rb_define_method(re2_cRegexp, "ok?", RUBY_METHOD_FUNC(re2_regexp_ok), 0);
|
1124
|
-
rb_define_method(re2_cRegexp, "error", RUBY_METHOD_FUNC(re2_regexp_error),
|
1125
|
-
|
1126
|
-
rb_define_method(re2_cRegexp, "
|
1127
|
-
|
1128
|
-
rb_define_method(re2_cRegexp, "
|
1129
|
-
|
1130
|
-
rb_define_method(re2_cRegexp, "
|
1131
|
-
|
1132
|
-
rb_define_method(re2_cRegexp, "
|
1133
|
-
|
1242
|
+
rb_define_method(re2_cRegexp, "error", RUBY_METHOD_FUNC(re2_regexp_error),
|
1243
|
+
0);
|
1244
|
+
rb_define_method(re2_cRegexp, "error_arg",
|
1245
|
+
RUBY_METHOD_FUNC(re2_regexp_error_arg), 0);
|
1246
|
+
rb_define_method(re2_cRegexp, "program_size",
|
1247
|
+
RUBY_METHOD_FUNC(re2_regexp_program_size), 0);
|
1248
|
+
rb_define_method(re2_cRegexp, "options",
|
1249
|
+
RUBY_METHOD_FUNC(re2_regexp_options), 0);
|
1250
|
+
rb_define_method(re2_cRegexp, "number_of_capturing_groups",
|
1251
|
+
RUBY_METHOD_FUNC(re2_regexp_number_of_capturing_groups), 0);
|
1252
|
+
rb_define_method(re2_cRegexp, "named_capturing_groups",
|
1253
|
+
RUBY_METHOD_FUNC(re2_regexp_named_capturing_groups), 0);
|
1254
|
+
rb_define_method(re2_cRegexp, "match", RUBY_METHOD_FUNC(re2_regexp_match),
|
1255
|
+
-1);
|
1256
|
+
rb_define_method(re2_cRegexp, "match?",
|
1257
|
+
RUBY_METHOD_FUNC(re2_regexp_match_query), 1);
|
1258
|
+
rb_define_method(re2_cRegexp, "=~",
|
1259
|
+
RUBY_METHOD_FUNC(re2_regexp_match_query), 1);
|
1260
|
+
rb_define_method(re2_cRegexp, "===",
|
1261
|
+
RUBY_METHOD_FUNC(re2_regexp_match_query), 1);
|
1262
|
+
rb_define_method(re2_cRegexp, "consume", RUBY_METHOD_FUNC(re2_regexp_consume), 1);
|
1134
1263
|
rb_define_method(re2_cRegexp, "to_s", RUBY_METHOD_FUNC(re2_regexp_to_s), 0);
|
1135
|
-
rb_define_method(re2_cRegexp, "to_str", RUBY_METHOD_FUNC(re2_regexp_to_s),
|
1136
|
-
|
1137
|
-
rb_define_method(re2_cRegexp, "
|
1138
|
-
|
1139
|
-
rb_define_method(re2_cRegexp, "
|
1140
|
-
|
1141
|
-
rb_define_method(re2_cRegexp, "
|
1142
|
-
|
1143
|
-
rb_define_method(re2_cRegexp, "
|
1144
|
-
|
1145
|
-
rb_define_method(re2_cRegexp, "
|
1146
|
-
|
1147
|
-
rb_define_method(re2_cRegexp, "
|
1148
|
-
|
1149
|
-
rb_define_method(re2_cRegexp, "
|
1150
|
-
|
1151
|
-
rb_define_method(re2_cRegexp, "
|
1152
|
-
|
1153
|
-
|
1154
|
-
|
1155
|
-
|
1156
|
-
|
1157
|
-
|
1158
|
-
|
1264
|
+
rb_define_method(re2_cRegexp, "to_str", RUBY_METHOD_FUNC(re2_regexp_to_s),
|
1265
|
+
0);
|
1266
|
+
rb_define_method(re2_cRegexp, "pattern", RUBY_METHOD_FUNC(re2_regexp_to_s),
|
1267
|
+
0);
|
1268
|
+
rb_define_method(re2_cRegexp, "source", RUBY_METHOD_FUNC(re2_regexp_to_s),
|
1269
|
+
0);
|
1270
|
+
rb_define_method(re2_cRegexp, "inspect",
|
1271
|
+
RUBY_METHOD_FUNC(re2_regexp_inspect), 0);
|
1272
|
+
rb_define_method(re2_cRegexp, "utf8?", RUBY_METHOD_FUNC(re2_regexp_utf8),
|
1273
|
+
0);
|
1274
|
+
rb_define_method(re2_cRegexp, "posix_syntax?",
|
1275
|
+
RUBY_METHOD_FUNC(re2_regexp_posix_syntax), 0);
|
1276
|
+
rb_define_method(re2_cRegexp, "longest_match?",
|
1277
|
+
RUBY_METHOD_FUNC(re2_regexp_longest_match), 0);
|
1278
|
+
rb_define_method(re2_cRegexp, "log_errors?",
|
1279
|
+
RUBY_METHOD_FUNC(re2_regexp_log_errors), 0);
|
1280
|
+
rb_define_method(re2_cRegexp, "max_mem",
|
1281
|
+
RUBY_METHOD_FUNC(re2_regexp_max_mem), 0);
|
1282
|
+
rb_define_method(re2_cRegexp, "literal?",
|
1283
|
+
RUBY_METHOD_FUNC(re2_regexp_literal), 0);
|
1284
|
+
rb_define_method(re2_cRegexp, "never_nl?",
|
1285
|
+
RUBY_METHOD_FUNC(re2_regexp_never_nl), 0);
|
1286
|
+
rb_define_method(re2_cRegexp, "case_sensitive?",
|
1287
|
+
RUBY_METHOD_FUNC(re2_regexp_case_sensitive), 0);
|
1288
|
+
rb_define_method(re2_cRegexp, "case_insensitive?",
|
1289
|
+
RUBY_METHOD_FUNC(re2_regexp_case_insensitive), 0);
|
1290
|
+
rb_define_method(re2_cRegexp, "casefold?",
|
1291
|
+
RUBY_METHOD_FUNC(re2_regexp_case_insensitive), 0);
|
1292
|
+
rb_define_method(re2_cRegexp, "perl_classes?",
|
1293
|
+
RUBY_METHOD_FUNC(re2_regexp_perl_classes), 0);
|
1294
|
+
rb_define_method(re2_cRegexp, "word_boundary?",
|
1295
|
+
RUBY_METHOD_FUNC(re2_regexp_word_boundary), 0);
|
1296
|
+
rb_define_method(re2_cRegexp, "one_line?",
|
1297
|
+
RUBY_METHOD_FUNC(re2_regexp_one_line), 0);
|
1298
|
+
|
1299
|
+
rb_define_module_function(re2_mRE2, "Replace",
|
1300
|
+
RUBY_METHOD_FUNC(re2_Replace), 3);
|
1301
|
+
rb_define_module_function(re2_mRE2, "GlobalReplace",
|
1302
|
+
RUBY_METHOD_FUNC(re2_GlobalReplace), 3);
|
1303
|
+
rb_define_module_function(re2_mRE2, "QuoteMeta",
|
1304
|
+
RUBY_METHOD_FUNC(re2_QuoteMeta), 1);
|
1305
|
+
rb_define_singleton_method(re2_cRegexp, "escape",
|
1306
|
+
RUBY_METHOD_FUNC(re2_QuoteMeta), 1);
|
1307
|
+
rb_define_singleton_method(re2_cRegexp, "quote",
|
1308
|
+
RUBY_METHOD_FUNC(re2_QuoteMeta), 1);
|
1309
|
+
rb_define_singleton_method(re2_cRegexp, "compile",
|
1310
|
+
RUBY_METHOD_FUNC(rb_class_new_instance), -1);
|
1159
1311
|
|
1160
1312
|
rb_define_global_function("RE2", RUBY_METHOD_FUNC(re2_re2), -1);
|
1161
1313
|
|