re2 2.0.0-x86_64-linux → 2.1.1-x86_64-linux
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/LICENSE-DEPENDENCIES.txt +237 -0
- data/LICENSE.txt +1 -1
- data/README.md +47 -16
- data/ext/re2/extconf.rb +0 -1
- data/ext/re2/re2.cc +306 -317
- data/lib/2.6/re2.so +0 -0
- data/lib/2.7/re2.so +0 -0
- data/lib/3.0/re2.so +0 -0
- data/lib/3.1/re2.so +0 -0
- data/lib/3.2/re2.so +0 -0
- data/lib/re2/version.rb +1 -1
- data/re2.gemspec +5 -4
- data/spec/kernel_spec.rb +3 -3
- data/spec/re2/match_data_spec.rb +24 -0
- data/spec/re2/regexp_spec.rb +6 -0
- data/spec/re2/scanner_spec.rb +76 -22
- data/spec/re2/set_spec.rb +41 -1
- data/spec/re2/string_spec.rb +7 -3
- data/spec/re2_spec.rb +104 -10
- data/spec/spec_helper.rb +10 -0
- metadata +6 -4
data/ext/re2/re2.cc
CHANGED
@@ -6,77 +6,21 @@
|
|
6
6
|
* Released under the BSD Licence, please see LICENSE.txt
|
7
7
|
*/
|
8
8
|
|
9
|
-
#include <ruby.h>
|
10
|
-
#include <re2/re2.h>
|
11
|
-
#include <re2/set.h>
|
12
9
|
#include <stdint.h>
|
13
|
-
|
10
|
+
|
11
|
+
#include <map>
|
14
12
|
#include <sstream>
|
13
|
+
#include <string>
|
15
14
|
#include <vector>
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
15
|
+
|
16
|
+
#include <re2/re2.h>
|
17
|
+
#include <re2/set.h>
|
18
|
+
#include <ruby.h>
|
19
|
+
#include <ruby/encoding.h>
|
21
20
|
|
22
21
|
#define BOOL2RUBY(v) (v ? Qtrue : Qfalse)
|
23
22
|
#define UNUSED(x) ((void)x)
|
24
23
|
|
25
|
-
#ifndef RSTRING_LEN
|
26
|
-
#define RSTRING_LEN(x) (RSTRING(x)->len)
|
27
|
-
#endif
|
28
|
-
|
29
|
-
#ifndef RSTRING_PTR
|
30
|
-
#define RSTRING_PTR(x) (RSTRING(x)->ptr)
|
31
|
-
#endif
|
32
|
-
|
33
|
-
#ifdef HAVE_RUBY_ENCODING_H
|
34
|
-
#include <ruby/encoding.h>
|
35
|
-
#define ENCODED_STR_NEW(str, length, encoding) \
|
36
|
-
({ \
|
37
|
-
VALUE _string = rb_str_new(str, length); \
|
38
|
-
int _enc = rb_enc_find_index(encoding); \
|
39
|
-
rb_enc_associate_index(_string, _enc); \
|
40
|
-
_string; \
|
41
|
-
})
|
42
|
-
#define ENCODED_STR_NEW2(str, length, str2) \
|
43
|
-
({ \
|
44
|
-
VALUE _string = rb_str_new(str, length); \
|
45
|
-
int _enc = rb_enc_get_index(str2); \
|
46
|
-
rb_enc_associate_index(_string, _enc); \
|
47
|
-
_string; \
|
48
|
-
})
|
49
|
-
#else
|
50
|
-
#define ENCODED_STR_NEW(str, length, encoding) \
|
51
|
-
rb_str_new((const char *)str, (long)length)
|
52
|
-
#define ENCODED_STR_NEW2(str, length, str2) \
|
53
|
-
rb_str_new((const char *)str, (long)length)
|
54
|
-
#endif
|
55
|
-
|
56
|
-
#ifdef HAVE_RB_STR_SUBLEN
|
57
|
-
#define ENCODED_STR_SUBLEN(str, offset, encoding) \
|
58
|
-
LONG2NUM(rb_str_sublen(str, offset))
|
59
|
-
#else
|
60
|
-
#ifdef HAVE_RUBY_ENCODING_H
|
61
|
-
#define ENCODED_STR_SUBLEN(str, offset, encoding) \
|
62
|
-
({ \
|
63
|
-
VALUE _string = ENCODED_STR_NEW(RSTRING_PTR(str), offset, encoding); \
|
64
|
-
rb_str_length(_string); \
|
65
|
-
})
|
66
|
-
#else
|
67
|
-
#define ENCODED_STR_SUBLEN(str, offset, encoding) \
|
68
|
-
LONG2NUM(offset)
|
69
|
-
#endif
|
70
|
-
#endif
|
71
|
-
|
72
|
-
#ifdef HAVE_ENDPOS_ARGUMENT
|
73
|
-
#define match(pattern, text, startpos, endpos, anchor, match, nmatch) \
|
74
|
-
(pattern->Match(text, startpos, endpos, anchor, match, nmatch))
|
75
|
-
#else
|
76
|
-
#define match(pattern, text, startpos, endpos, anchor, match, nmatch) \
|
77
|
-
(pattern->Match(text, startpos, anchor, match, nmatch))
|
78
|
-
#endif
|
79
|
-
|
80
24
|
typedef struct {
|
81
25
|
RE2 *pattern;
|
82
26
|
} re2_pattern;
|
@@ -107,95 +51,103 @@ static ID id_utf8, id_posix_syntax, id_longest_match, id_log_errors,
|
|
107
51
|
id_perl_classes, id_word_boundary, id_one_line,
|
108
52
|
id_unanchored, id_anchor_start, id_anchor_both, id_exception;
|
109
53
|
|
110
|
-
|
54
|
+
inline VALUE encoded_str_new(const char *str, long length, RE2::Options::Encoding encoding) {
|
55
|
+
if (encoding == RE2::Options::EncodingUTF8) {
|
56
|
+
return rb_utf8_str_new(str, length);
|
57
|
+
}
|
58
|
+
|
59
|
+
VALUE string = rb_str_new(str, length);
|
60
|
+
rb_enc_associate_index(string, rb_enc_find_index("ISO-8859-1"));
|
61
|
+
|
62
|
+
return string;
|
63
|
+
}
|
64
|
+
|
65
|
+
static void parse_re2_options(RE2::Options* re2_options, const VALUE options) {
|
111
66
|
if (TYPE(options) != T_HASH) {
|
112
67
|
rb_raise(rb_eArgError, "options should be a hash");
|
113
68
|
}
|
114
|
-
VALUE utf8, posix_syntax, longest_match, log_errors,
|
115
|
-
max_mem, literal, never_nl, case_sensitive, perl_classes,
|
116
|
-
word_boundary, one_line;
|
117
69
|
|
118
|
-
utf8 = rb_hash_aref(options, ID2SYM(id_utf8));
|
70
|
+
VALUE utf8 = rb_hash_aref(options, ID2SYM(id_utf8));
|
119
71
|
if (!NIL_P(utf8)) {
|
120
|
-
re2_options
|
72
|
+
re2_options->set_encoding(RTEST(utf8) ? RE2::Options::EncodingUTF8 : RE2::Options::EncodingLatin1);
|
121
73
|
}
|
122
74
|
|
123
|
-
posix_syntax = rb_hash_aref(options, ID2SYM(id_posix_syntax));
|
75
|
+
VALUE posix_syntax = rb_hash_aref(options, ID2SYM(id_posix_syntax));
|
124
76
|
if (!NIL_P(posix_syntax)) {
|
125
|
-
re2_options
|
77
|
+
re2_options->set_posix_syntax(RTEST(posix_syntax));
|
126
78
|
}
|
127
79
|
|
128
|
-
longest_match = rb_hash_aref(options, ID2SYM(id_longest_match));
|
80
|
+
VALUE longest_match = rb_hash_aref(options, ID2SYM(id_longest_match));
|
129
81
|
if (!NIL_P(longest_match)) {
|
130
|
-
re2_options
|
82
|
+
re2_options->set_longest_match(RTEST(longest_match));
|
131
83
|
}
|
132
84
|
|
133
|
-
log_errors = rb_hash_aref(options, ID2SYM(id_log_errors));
|
85
|
+
VALUE log_errors = rb_hash_aref(options, ID2SYM(id_log_errors));
|
134
86
|
if (!NIL_P(log_errors)) {
|
135
|
-
re2_options
|
87
|
+
re2_options->set_log_errors(RTEST(log_errors));
|
136
88
|
}
|
137
89
|
|
138
|
-
max_mem = rb_hash_aref(options, ID2SYM(id_max_mem));
|
90
|
+
VALUE max_mem = rb_hash_aref(options, ID2SYM(id_max_mem));
|
139
91
|
if (!NIL_P(max_mem)) {
|
140
|
-
re2_options
|
92
|
+
re2_options->set_max_mem(NUM2INT(max_mem));
|
141
93
|
}
|
142
94
|
|
143
|
-
literal = rb_hash_aref(options, ID2SYM(id_literal));
|
95
|
+
VALUE literal = rb_hash_aref(options, ID2SYM(id_literal));
|
144
96
|
if (!NIL_P(literal)) {
|
145
|
-
re2_options
|
97
|
+
re2_options->set_literal(RTEST(literal));
|
146
98
|
}
|
147
99
|
|
148
|
-
never_nl = rb_hash_aref(options, ID2SYM(id_never_nl));
|
100
|
+
VALUE never_nl = rb_hash_aref(options, ID2SYM(id_never_nl));
|
149
101
|
if (!NIL_P(never_nl)) {
|
150
|
-
re2_options
|
102
|
+
re2_options->set_never_nl(RTEST(never_nl));
|
151
103
|
}
|
152
104
|
|
153
|
-
case_sensitive = rb_hash_aref(options, ID2SYM(id_case_sensitive));
|
105
|
+
VALUE case_sensitive = rb_hash_aref(options, ID2SYM(id_case_sensitive));
|
154
106
|
if (!NIL_P(case_sensitive)) {
|
155
|
-
re2_options
|
107
|
+
re2_options->set_case_sensitive(RTEST(case_sensitive));
|
156
108
|
}
|
157
109
|
|
158
|
-
perl_classes = rb_hash_aref(options, ID2SYM(id_perl_classes));
|
110
|
+
VALUE perl_classes = rb_hash_aref(options, ID2SYM(id_perl_classes));
|
159
111
|
if (!NIL_P(perl_classes)) {
|
160
|
-
re2_options
|
112
|
+
re2_options->set_perl_classes(RTEST(perl_classes));
|
161
113
|
}
|
162
114
|
|
163
|
-
word_boundary = rb_hash_aref(options, ID2SYM(id_word_boundary));
|
115
|
+
VALUE word_boundary = rb_hash_aref(options, ID2SYM(id_word_boundary));
|
164
116
|
if (!NIL_P(word_boundary)) {
|
165
|
-
re2_options
|
117
|
+
re2_options->set_word_boundary(RTEST(word_boundary));
|
166
118
|
}
|
167
119
|
|
168
|
-
one_line = rb_hash_aref(options, ID2SYM(id_one_line));
|
120
|
+
VALUE one_line = rb_hash_aref(options, ID2SYM(id_one_line));
|
169
121
|
if (!NIL_P(one_line)) {
|
170
|
-
re2_options
|
122
|
+
re2_options->set_one_line(RTEST(one_line));
|
171
123
|
}
|
172
124
|
}
|
173
125
|
|
174
|
-
void re2_matchdata_mark(re2_matchdata* self) {
|
126
|
+
static void re2_matchdata_mark(re2_matchdata* self) {
|
175
127
|
rb_gc_mark(self->regexp);
|
176
128
|
rb_gc_mark(self->text);
|
177
129
|
}
|
178
130
|
|
179
|
-
void re2_matchdata_free(re2_matchdata* self) {
|
131
|
+
static void re2_matchdata_free(re2_matchdata* self) {
|
180
132
|
if (self->matches) {
|
181
133
|
delete[] self->matches;
|
182
134
|
}
|
183
135
|
free(self);
|
184
136
|
}
|
185
137
|
|
186
|
-
void re2_scanner_mark(re2_scanner* self) {
|
138
|
+
static void re2_scanner_mark(re2_scanner* self) {
|
187
139
|
rb_gc_mark(self->regexp);
|
188
140
|
rb_gc_mark(self->text);
|
189
141
|
}
|
190
142
|
|
191
|
-
void re2_scanner_free(re2_scanner* self) {
|
143
|
+
static void re2_scanner_free(re2_scanner* self) {
|
192
144
|
if (self->input) {
|
193
145
|
delete self->input;
|
194
146
|
}
|
195
147
|
free(self);
|
196
148
|
}
|
197
149
|
|
198
|
-
void re2_regexp_free(re2_pattern* self) {
|
150
|
+
static void re2_regexp_free(re2_pattern* self) {
|
199
151
|
if (self->pattern) {
|
200
152
|
delete self->pattern;
|
201
153
|
}
|
@@ -204,12 +156,14 @@ void re2_regexp_free(re2_pattern* self) {
|
|
204
156
|
|
205
157
|
static VALUE re2_matchdata_allocate(VALUE klass) {
|
206
158
|
re2_matchdata *m;
|
159
|
+
|
207
160
|
return Data_Make_Struct(klass, re2_matchdata, re2_matchdata_mark,
|
208
161
|
re2_matchdata_free, m);
|
209
162
|
}
|
210
163
|
|
211
164
|
static VALUE re2_scanner_allocate(VALUE klass) {
|
212
165
|
re2_scanner *c;
|
166
|
+
|
213
167
|
return Data_Make_Struct(klass, re2_scanner, re2_scanner_mark,
|
214
168
|
re2_scanner_free, c);
|
215
169
|
}
|
@@ -222,7 +176,7 @@ static VALUE re2_scanner_allocate(VALUE klass) {
|
|
222
176
|
* m = RE2::Regexp.new('(\d+)').match("bob 123")
|
223
177
|
* m.string #=> "bob 123"
|
224
178
|
*/
|
225
|
-
static VALUE re2_matchdata_string(VALUE self) {
|
179
|
+
static VALUE re2_matchdata_string(const VALUE self) {
|
226
180
|
re2_matchdata *m;
|
227
181
|
Data_Get_Struct(self, re2_matchdata, m);
|
228
182
|
|
@@ -237,7 +191,7 @@ static VALUE re2_matchdata_string(VALUE self) {
|
|
237
191
|
* c = RE2::Regexp.new('(\d+)').scan("foo")
|
238
192
|
* c.string #=> "foo"
|
239
193
|
*/
|
240
|
-
static VALUE re2_scanner_string(VALUE self) {
|
194
|
+
static VALUE re2_scanner_string(const VALUE self) {
|
241
195
|
re2_scanner *c;
|
242
196
|
Data_Get_Struct(self, re2_scanner, c);
|
243
197
|
|
@@ -252,7 +206,7 @@ static VALUE re2_scanner_string(VALUE self) {
|
|
252
206
|
* c = RE2::Regexp.new('(\d+)').scan("foo")
|
253
207
|
* c.eof? #=> true
|
254
208
|
*/
|
255
|
-
static VALUE re2_scanner_eof(VALUE self) {
|
209
|
+
static VALUE re2_scanner_eof(const VALUE self) {
|
256
210
|
re2_scanner *c;
|
257
211
|
Data_Get_Struct(self, re2_scanner, c);
|
258
212
|
|
@@ -274,7 +228,7 @@ static VALUE re2_scanner_rewind(VALUE self) {
|
|
274
228
|
re2_scanner *c;
|
275
229
|
Data_Get_Struct(self, re2_scanner, c);
|
276
230
|
|
277
|
-
c->input = new(nothrow) re2::StringPiece(StringValuePtr(c->text));
|
231
|
+
c->input = new(std::nothrow) re2::StringPiece(StringValuePtr(c->text));
|
278
232
|
c->eof = false;
|
279
233
|
|
280
234
|
return self;
|
@@ -284,6 +238,10 @@ static VALUE re2_scanner_rewind(VALUE self) {
|
|
284
238
|
* Scan the given text incrementally for matches, returning an array of
|
285
239
|
* matches on each subsequent call. Returns nil if no matches are found.
|
286
240
|
*
|
241
|
+
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
242
|
+
* returned in UTF-8 by default or ISO-8859-1 if the :utf8 option for the
|
243
|
+
* RE2::Regexp is set to false (any other encoding's behaviour is undefined).
|
244
|
+
*
|
287
245
|
* @return [Array<String>] the matches.
|
288
246
|
* @example
|
289
247
|
* s = RE2::Regexp.new('(\w+)').scan("Foo bar baz")
|
@@ -291,45 +249,41 @@ static VALUE re2_scanner_rewind(VALUE self) {
|
|
291
249
|
* s.scan #=> ["bar"]
|
292
250
|
*/
|
293
251
|
static VALUE re2_scanner_scan(VALUE self) {
|
294
|
-
int i;
|
295
|
-
size_t original_input_size, new_input_size;
|
296
|
-
bool input_advanced;
|
297
252
|
re2_pattern *p;
|
298
253
|
re2_scanner *c;
|
299
|
-
VALUE result;
|
300
254
|
|
301
255
|
Data_Get_Struct(self, re2_scanner, c);
|
302
256
|
Data_Get_Struct(c->regexp, re2_pattern, p);
|
303
257
|
|
304
|
-
vector<RE2::Arg> argv(c->number_of_capturing_groups);
|
305
|
-
vector<RE2::Arg*> args(c->number_of_capturing_groups);
|
306
|
-
vector<string> matches(c->number_of_capturing_groups);
|
258
|
+
std::vector<RE2::Arg> argv(c->number_of_capturing_groups);
|
259
|
+
std::vector<RE2::Arg*> args(c->number_of_capturing_groups);
|
260
|
+
std::vector<std::string> matches(c->number_of_capturing_groups);
|
307
261
|
|
308
262
|
if (c->eof) {
|
309
263
|
return Qnil;
|
310
264
|
}
|
311
265
|
|
312
|
-
original_input_size = c->input->size();
|
266
|
+
re2::StringPiece::size_type original_input_size = c->input->size();
|
313
267
|
|
314
|
-
for (i = 0; i < c->number_of_capturing_groups; i
|
315
|
-
matches[i] = "";
|
268
|
+
for (int i = 0; i < c->number_of_capturing_groups; ++i) {
|
316
269
|
argv[i] = &matches[i];
|
317
270
|
args[i] = &argv[i];
|
318
271
|
}
|
319
272
|
|
320
273
|
if (RE2::FindAndConsumeN(c->input, *p->pattern, &args[0],
|
321
274
|
c->number_of_capturing_groups)) {
|
322
|
-
|
323
|
-
|
324
|
-
input_advanced = new_input_size < original_input_size;
|
275
|
+
re2::StringPiece::size_type new_input_size = c->input->size();
|
276
|
+
bool input_advanced = new_input_size < original_input_size;
|
325
277
|
|
326
|
-
|
278
|
+
VALUE result = rb_ary_new2(c->number_of_capturing_groups);
|
279
|
+
|
280
|
+
for (int i = 0; i < c->number_of_capturing_groups; ++i) {
|
327
281
|
if (matches[i].empty()) {
|
328
282
|
rb_ary_push(result, Qnil);
|
329
283
|
} else {
|
330
|
-
rb_ary_push(result,
|
284
|
+
rb_ary_push(result, encoded_str_new(matches[i].data(),
|
331
285
|
matches[i].size(),
|
332
|
-
p->pattern->options().encoding()
|
286
|
+
p->pattern->options().encoding()));
|
333
287
|
}
|
334
288
|
}
|
335
289
|
|
@@ -340,47 +294,40 @@ static VALUE re2_scanner_scan(VALUE self) {
|
|
340
294
|
if (!input_advanced && new_input_size > 0) {
|
341
295
|
c->input->remove_prefix(1);
|
342
296
|
}
|
297
|
+
|
298
|
+
return result;
|
343
299
|
} else {
|
344
|
-
|
300
|
+
return Qnil;
|
345
301
|
}
|
346
|
-
|
347
|
-
return result;
|
348
302
|
}
|
349
303
|
|
350
304
|
/*
|
351
305
|
* Retrieve a matchdata by index or name.
|
352
306
|
*/
|
353
|
-
re2::StringPiece *re2_matchdata_find_match(VALUE idx, VALUE self) {
|
354
|
-
int id;
|
307
|
+
static re2::StringPiece *re2_matchdata_find_match(VALUE idx, const VALUE self) {
|
355
308
|
re2_matchdata *m;
|
356
309
|
re2_pattern *p;
|
357
|
-
map<string, int> groups;
|
358
|
-
string name;
|
359
|
-
re2::StringPiece *match;
|
360
310
|
|
361
311
|
Data_Get_Struct(self, re2_matchdata, m);
|
362
312
|
Data_Get_Struct(m->regexp, re2_pattern, p);
|
363
313
|
|
314
|
+
int id;
|
315
|
+
|
364
316
|
if (FIXNUM_P(idx)) {
|
365
317
|
id = FIX2INT(idx);
|
366
318
|
} else {
|
367
|
-
|
368
|
-
|
369
|
-
} else {
|
370
|
-
name = StringValuePtr(idx);
|
371
|
-
}
|
372
|
-
|
373
|
-
groups = p->pattern->NamedCapturingGroups();
|
319
|
+
const char *name = SYMBOL_P(idx) ? rb_id2name(SYM2ID(idx)) : StringValuePtr(idx);
|
320
|
+
const std::map<std::string, int>& groups = p->pattern->NamedCapturingGroups();
|
374
321
|
|
375
|
-
if (groups.
|
376
|
-
id =
|
322
|
+
if (std::map<std::string, int>::const_iterator search = groups.find(name); search != groups.end()) {
|
323
|
+
id = search->second;
|
377
324
|
} else {
|
378
325
|
return NULL;
|
379
326
|
}
|
380
327
|
}
|
381
328
|
|
382
329
|
if (id >= 0 && id < m->number_of_matches) {
|
383
|
-
match = &m->matches[id];
|
330
|
+
re2::StringPiece *match = &m->matches[id];
|
384
331
|
|
385
332
|
if (!match->empty()) {
|
386
333
|
return match;
|
@@ -399,7 +346,7 @@ re2::StringPiece *re2_matchdata_find_match(VALUE idx, VALUE self) {
|
|
399
346
|
* m.size #=> 2
|
400
347
|
* m.length #=> 2
|
401
348
|
*/
|
402
|
-
static VALUE re2_matchdata_size(VALUE self) {
|
349
|
+
static VALUE re2_matchdata_size(const VALUE self) {
|
403
350
|
re2_matchdata *m;
|
404
351
|
Data_Get_Struct(self, re2_matchdata, m);
|
405
352
|
|
@@ -416,23 +363,18 @@ static VALUE re2_matchdata_size(VALUE self) {
|
|
416
363
|
* m.begin(0) #=> 1
|
417
364
|
* m.begin(1) #=> 4
|
418
365
|
*/
|
419
|
-
static VALUE re2_matchdata_begin(VALUE self, VALUE n) {
|
366
|
+
static VALUE re2_matchdata_begin(const VALUE self, VALUE n) {
|
420
367
|
re2_matchdata *m;
|
421
|
-
re2_pattern *p;
|
422
|
-
re2::StringPiece *match;
|
423
|
-
long offset;
|
424
368
|
|
425
369
|
Data_Get_Struct(self, re2_matchdata, m);
|
426
|
-
Data_Get_Struct(m->regexp, re2_pattern, p);
|
427
370
|
|
428
|
-
match = re2_matchdata_find_match(n, self);
|
371
|
+
re2::StringPiece *match = re2_matchdata_find_match(n, self);
|
429
372
|
if (match == NULL) {
|
430
373
|
return Qnil;
|
431
374
|
} else {
|
432
|
-
offset =
|
375
|
+
long offset = match->data() - StringValuePtr(m->text);
|
433
376
|
|
434
|
-
return
|
435
|
-
p->pattern->options().encoding() == RE2::Options::EncodingUTF8 ? "UTF-8" : "ISO-8859-1");
|
377
|
+
return LONG2NUM(rb_str_sublen(StringValue(m->text), offset));
|
436
378
|
}
|
437
379
|
}
|
438
380
|
|
@@ -446,24 +388,18 @@ static VALUE re2_matchdata_begin(VALUE self, VALUE n) {
|
|
446
388
|
* m.end(0) #=> 9
|
447
389
|
* m.end(1) #=> 7
|
448
390
|
*/
|
449
|
-
static VALUE re2_matchdata_end(VALUE self, VALUE n) {
|
391
|
+
static VALUE re2_matchdata_end(const VALUE self, VALUE n) {
|
450
392
|
re2_matchdata *m;
|
451
|
-
re2_pattern *p;
|
452
|
-
re2::StringPiece *match;
|
453
|
-
long offset;
|
454
393
|
|
455
394
|
Data_Get_Struct(self, re2_matchdata, m);
|
456
|
-
Data_Get_Struct(m->regexp, re2_pattern, p);
|
457
|
-
|
458
|
-
match = re2_matchdata_find_match(n, self);
|
459
395
|
|
396
|
+
re2::StringPiece *match = re2_matchdata_find_match(n, self);
|
460
397
|
if (match == NULL) {
|
461
398
|
return Qnil;
|
462
399
|
} else {
|
463
|
-
offset =
|
400
|
+
long offset = (match->data() - StringValuePtr(m->text)) + match->size();
|
464
401
|
|
465
|
-
return
|
466
|
-
p->pattern->options().encoding() == RE2::Options::EncodingUTF8 ? "UTF-8" : "ISO-8859-1");
|
402
|
+
return LONG2NUM(rb_str_sublen(StringValue(m->text), offset));
|
467
403
|
}
|
468
404
|
}
|
469
405
|
|
@@ -475,9 +411,10 @@ static VALUE re2_matchdata_end(VALUE self, VALUE n) {
|
|
475
411
|
* m = RE2::Regexp.new('(\d+)').match("bob 123")
|
476
412
|
* m.regexp #=> #<RE2::Regexp /(\d+)/>
|
477
413
|
*/
|
478
|
-
static VALUE re2_matchdata_regexp(VALUE self) {
|
414
|
+
static VALUE re2_matchdata_regexp(const VALUE self) {
|
479
415
|
re2_matchdata *m;
|
480
416
|
Data_Get_Struct(self, re2_matchdata, m);
|
417
|
+
|
481
418
|
return m->regexp;
|
482
419
|
}
|
483
420
|
|
@@ -489,7 +426,7 @@ static VALUE re2_matchdata_regexp(VALUE self) {
|
|
489
426
|
* c = RE2::Regexp.new('(\d+)').scan("bob 123")
|
490
427
|
* c.regexp #=> #<RE2::Regexp /(\d+)/>
|
491
428
|
*/
|
492
|
-
static VALUE re2_scanner_regexp(VALUE self) {
|
429
|
+
static VALUE re2_scanner_regexp(const VALUE self) {
|
493
430
|
re2_scanner *c;
|
494
431
|
Data_Get_Struct(self, re2_scanner, c);
|
495
432
|
|
@@ -498,46 +435,47 @@ static VALUE re2_scanner_regexp(VALUE self) {
|
|
498
435
|
|
499
436
|
static VALUE re2_regexp_allocate(VALUE klass) {
|
500
437
|
re2_pattern *p;
|
438
|
+
|
501
439
|
return Data_Make_Struct(klass, re2_pattern, 0, re2_regexp_free, p);
|
502
440
|
}
|
503
441
|
|
504
442
|
/*
|
505
443
|
* Returns the array of matches.
|
506
444
|
*
|
445
|
+
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
446
|
+
* returned in UTF-8 by default or ISO-8859-1 if the :utf8 option for the
|
447
|
+
* RE2::Regexp is set to false (any other encoding's behaviour is undefined).
|
448
|
+
*
|
507
449
|
* @return [Array<String, nil>] the array of matches
|
508
450
|
* @example
|
509
451
|
* m = RE2::Regexp.new('(\d+)').match("bob 123")
|
510
452
|
* m.to_a #=> ["123", "123"]
|
511
453
|
*/
|
512
|
-
static VALUE re2_matchdata_to_a(VALUE self) {
|
513
|
-
int i;
|
454
|
+
static VALUE re2_matchdata_to_a(const VALUE self) {
|
514
455
|
re2_matchdata *m;
|
515
456
|
re2_pattern *p;
|
516
|
-
re2::StringPiece *match;
|
517
|
-
VALUE array;
|
518
457
|
|
519
458
|
Data_Get_Struct(self, re2_matchdata, m);
|
520
459
|
Data_Get_Struct(m->regexp, re2_pattern, p);
|
521
460
|
|
522
|
-
array = rb_ary_new2(m->number_of_matches);
|
523
|
-
for (i = 0; i < m->number_of_matches; i
|
524
|
-
match = &m->matches[i];
|
461
|
+
VALUE array = rb_ary_new2(m->number_of_matches);
|
462
|
+
for (int i = 0; i < m->number_of_matches; ++i) {
|
463
|
+
re2::StringPiece *match = &m->matches[i];
|
525
464
|
|
526
465
|
if (match->empty()) {
|
527
466
|
rb_ary_push(array, Qnil);
|
528
467
|
} else {
|
529
|
-
rb_ary_push(array,
|
530
|
-
p->pattern->options().encoding()
|
468
|
+
rb_ary_push(array, encoded_str_new(match->data(), match->size(),
|
469
|
+
p->pattern->options().encoding()));
|
531
470
|
}
|
532
471
|
}
|
533
472
|
|
534
473
|
return array;
|
535
474
|
}
|
536
475
|
|
537
|
-
static VALUE re2_matchdata_nth_match(int nth, VALUE self) {
|
476
|
+
static VALUE re2_matchdata_nth_match(int nth, const VALUE self) {
|
538
477
|
re2_matchdata *m;
|
539
478
|
re2_pattern *p;
|
540
|
-
re2::StringPiece *match;
|
541
479
|
|
542
480
|
Data_Get_Struct(self, re2_matchdata, m);
|
543
481
|
Data_Get_Struct(m->regexp, re2_pattern, p);
|
@@ -545,32 +483,28 @@ static VALUE re2_matchdata_nth_match(int nth, VALUE self) {
|
|
545
483
|
if (nth < 0 || nth >= m->number_of_matches) {
|
546
484
|
return Qnil;
|
547
485
|
} else {
|
548
|
-
match = &m->matches[nth];
|
486
|
+
re2::StringPiece *match = &m->matches[nth];
|
549
487
|
|
550
488
|
if (match->empty()) {
|
551
489
|
return Qnil;
|
552
490
|
} else {
|
553
|
-
return
|
554
|
-
p->pattern->options().encoding()
|
491
|
+
return encoded_str_new(match->data(), match->size(),
|
492
|
+
p->pattern->options().encoding());
|
555
493
|
}
|
556
494
|
}
|
557
495
|
}
|
558
496
|
|
559
|
-
static VALUE re2_matchdata_named_match(const char* name, VALUE self) {
|
560
|
-
int idx;
|
497
|
+
static VALUE re2_matchdata_named_match(const char* name, const VALUE self) {
|
561
498
|
re2_matchdata *m;
|
562
499
|
re2_pattern *p;
|
563
|
-
map<string, int> groups;
|
564
|
-
string name_as_string(name);
|
565
500
|
|
566
501
|
Data_Get_Struct(self, re2_matchdata, m);
|
567
502
|
Data_Get_Struct(m->regexp, re2_pattern, p);
|
568
503
|
|
569
|
-
groups = p->pattern->NamedCapturingGroups();
|
504
|
+
const std::map<std::string, int>& groups = p->pattern->NamedCapturingGroups();
|
570
505
|
|
571
|
-
if (groups.
|
572
|
-
|
573
|
-
return re2_matchdata_nth_match(idx, self);
|
506
|
+
if (std::map<std::string, int>::const_iterator search = groups.find(name); search != groups.end()) {
|
507
|
+
return re2_matchdata_nth_match(search->second, self);
|
574
508
|
} else {
|
575
509
|
return Qnil;
|
576
510
|
}
|
@@ -579,6 +513,10 @@ static VALUE re2_matchdata_named_match(const char* name, VALUE self) {
|
|
579
513
|
/*
|
580
514
|
* Retrieve zero, one or more matches by index or name.
|
581
515
|
*
|
516
|
+
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
517
|
+
* returned in UTF-8 by default or ISO-8859-1 if the :utf8 option for the
|
518
|
+
* RE2::Regexp is set to false (any other encoding's behaviour is undefined).
|
519
|
+
*
|
582
520
|
* @return [Array<String, nil>, String, Boolean]
|
583
521
|
*
|
584
522
|
* @overload [](index)
|
@@ -619,7 +557,7 @@ static VALUE re2_matchdata_named_match(const char* name, VALUE self) {
|
|
619
557
|
* m["number"] #=> "123"
|
620
558
|
* m[:number] #=> "123"
|
621
559
|
*/
|
622
|
-
static VALUE re2_matchdata_aref(int argc, VALUE *argv, VALUE self) {
|
560
|
+
static VALUE re2_matchdata_aref(int argc, VALUE *argv, const VALUE self) {
|
623
561
|
VALUE idx, rest;
|
624
562
|
rb_scan_args(argc, argv, "11", &idx, &rest);
|
625
563
|
|
@@ -639,38 +577,40 @@ static VALUE re2_matchdata_aref(int argc, VALUE *argv, VALUE self) {
|
|
639
577
|
*
|
640
578
|
* @return [String] the entire matched string
|
641
579
|
*/
|
642
|
-
static VALUE re2_matchdata_to_s(VALUE self) {
|
580
|
+
static VALUE re2_matchdata_to_s(const VALUE self) {
|
643
581
|
return re2_matchdata_nth_match(0, self);
|
644
582
|
}
|
645
583
|
|
646
584
|
/*
|
647
585
|
* Returns a printable version of the match.
|
648
586
|
*
|
587
|
+
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
588
|
+
* returned in UTF-8 by default or ISO-8859-1 if the :utf8 option for the
|
589
|
+
* RE2::Regexp is set to false (any other encoding's behaviour is undefined).
|
590
|
+
*
|
649
591
|
* @return [String] a printable version of the match
|
650
592
|
* @example
|
651
593
|
* m = RE2::Regexp.new('(\d+)').match("bob 123")
|
652
594
|
* m.inspect #=> "#<RE2::MatchData \"123\" 1:\"123\">"
|
653
595
|
*/
|
654
|
-
static VALUE re2_matchdata_inspect(VALUE self) {
|
655
|
-
int i;
|
596
|
+
static VALUE re2_matchdata_inspect(const VALUE self) {
|
656
597
|
re2_matchdata *m;
|
657
598
|
re2_pattern *p;
|
658
|
-
VALUE match, result;
|
659
|
-
ostringstream output;
|
660
599
|
|
661
600
|
Data_Get_Struct(self, re2_matchdata, m);
|
662
601
|
Data_Get_Struct(m->regexp, re2_pattern, p);
|
663
602
|
|
603
|
+
std::ostringstream output;
|
664
604
|
output << "#<RE2::MatchData";
|
665
605
|
|
666
|
-
for (i = 0; i < m->number_of_matches; i
|
606
|
+
for (int i = 0; i < m->number_of_matches; ++i) {
|
667
607
|
output << " ";
|
668
608
|
|
669
609
|
if (i > 0) {
|
670
610
|
output << i << ":";
|
671
611
|
}
|
672
612
|
|
673
|
-
match = re2_matchdata_nth_match(i, self);
|
613
|
+
VALUE match = re2_matchdata_nth_match(i, self);
|
674
614
|
|
675
615
|
if (match == Qnil) {
|
676
616
|
output << "nil";
|
@@ -681,15 +621,17 @@ static VALUE re2_matchdata_inspect(VALUE self) {
|
|
681
621
|
|
682
622
|
output << ">";
|
683
623
|
|
684
|
-
|
685
|
-
p->pattern->options().encoding()
|
686
|
-
|
687
|
-
return result;
|
624
|
+
return encoded_str_new(output.str().data(), output.str().length(),
|
625
|
+
p->pattern->options().encoding());
|
688
626
|
}
|
689
627
|
|
690
628
|
/*
|
691
629
|
* Returns the array of submatches for pattern matching.
|
692
630
|
*
|
631
|
+
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
632
|
+
* returned in UTF-8 by default or ISO-8859-1 if the :utf8 option for the
|
633
|
+
* RE2::Regexp is set to false (any other encoding's behaviour is undefined).
|
634
|
+
*
|
693
635
|
* @return [Array<String, nil>] the array of submatches
|
694
636
|
* @example
|
695
637
|
* m = RE2::Regexp.new('(\d+)').match("bob 123")
|
@@ -703,25 +645,22 @@ static VALUE re2_matchdata_inspect(VALUE self) {
|
|
703
645
|
* puts "Unrecognised match"
|
704
646
|
* end
|
705
647
|
*/
|
706
|
-
static VALUE re2_matchdata_deconstruct(VALUE self) {
|
707
|
-
int i;
|
648
|
+
static VALUE re2_matchdata_deconstruct(const VALUE self) {
|
708
649
|
re2_matchdata *m;
|
709
650
|
re2_pattern *p;
|
710
|
-
re2::StringPiece *match;
|
711
|
-
VALUE array;
|
712
651
|
|
713
652
|
Data_Get_Struct(self, re2_matchdata, m);
|
714
653
|
Data_Get_Struct(m->regexp, re2_pattern, p);
|
715
654
|
|
716
|
-
array = rb_ary_new2(m->number_of_matches - 1);
|
717
|
-
for (i = 1; i < m->number_of_matches; i
|
718
|
-
match = &m->matches[i];
|
655
|
+
VALUE array = rb_ary_new2(m->number_of_matches - 1);
|
656
|
+
for (int i = 1; i < m->number_of_matches; ++i) {
|
657
|
+
re2::StringPiece *match = &m->matches[i];
|
719
658
|
|
720
659
|
if (match->empty()) {
|
721
660
|
rb_ary_push(array, Qnil);
|
722
661
|
} else {
|
723
|
-
rb_ary_push(array,
|
724
|
-
p->pattern->options().encoding()
|
662
|
+
rb_ary_push(array, encoded_str_new(match->data(), match->size(),
|
663
|
+
p->pattern->options().encoding()));
|
725
664
|
}
|
726
665
|
}
|
727
666
|
|
@@ -735,6 +674,10 @@ static VALUE re2_matchdata_deconstruct(VALUE self) {
|
|
735
674
|
* more keys than there are capturing groups. Given keys will populate the hash in
|
736
675
|
* order but an invalid name will cause the hash to be immediately returned.
|
737
676
|
*
|
677
|
+
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
678
|
+
* returned in UTF-8 by default or ISO-8859-1 if the :utf8 option for the
|
679
|
+
* RE2::Regexp is set to false (any other encoding's behaviour is undefined).
|
680
|
+
*
|
738
681
|
* @return [Hash] a hash of capturing group names to submatches
|
739
682
|
* @param [Array<Symbol>, nil] keys an array of Symbol capturing group names or nil to return all names
|
740
683
|
* @example
|
@@ -752,40 +695,36 @@ static VALUE re2_matchdata_deconstruct(VALUE self) {
|
|
752
695
|
* puts "Unrecognised match"
|
753
696
|
* end
|
754
697
|
*/
|
755
|
-
static VALUE re2_matchdata_deconstruct_keys(VALUE self, VALUE keys) {
|
756
|
-
int i;
|
757
|
-
VALUE capturing_groups, key;
|
698
|
+
static VALUE re2_matchdata_deconstruct_keys(const VALUE self, const VALUE keys) {
|
758
699
|
re2_matchdata *m;
|
759
700
|
re2_pattern *p;
|
760
|
-
map<string, int> groups;
|
761
|
-
map<string, int>::iterator iterator;
|
762
701
|
|
763
702
|
Data_Get_Struct(self, re2_matchdata, m);
|
764
703
|
Data_Get_Struct(m->regexp, re2_pattern, p);
|
765
704
|
|
766
|
-
groups = p->pattern->NamedCapturingGroups();
|
767
|
-
capturing_groups = rb_hash_new();
|
705
|
+
const std::map<std::string, int>& groups = p->pattern->NamedCapturingGroups();
|
706
|
+
VALUE capturing_groups = rb_hash_new();
|
768
707
|
|
769
708
|
if (NIL_P(keys)) {
|
770
|
-
for (
|
709
|
+
for (std::map<std::string, int>::const_iterator it = groups.begin(); it != groups.end(); ++it) {
|
771
710
|
rb_hash_aset(capturing_groups,
|
772
|
-
ID2SYM(rb_intern(
|
773
|
-
re2_matchdata_nth_match(
|
711
|
+
ID2SYM(rb_intern(it->first.data())),
|
712
|
+
re2_matchdata_nth_match(it->second, self));
|
774
713
|
}
|
775
714
|
} else {
|
776
715
|
Check_Type(keys, T_ARRAY);
|
777
716
|
|
778
717
|
if (p->pattern->NumberOfCapturingGroups() >= RARRAY_LEN(keys)) {
|
779
|
-
for (i = 0; i < RARRAY_LEN(keys); i
|
780
|
-
key = rb_ary_entry(keys, i);
|
718
|
+
for (int i = 0; i < RARRAY_LEN(keys); ++i) {
|
719
|
+
VALUE key = rb_ary_entry(keys, i);
|
781
720
|
Check_Type(key, T_SYMBOL);
|
782
|
-
|
721
|
+
const char *name = rb_id2name(SYM2ID(key));
|
783
722
|
|
784
|
-
if (groups.
|
723
|
+
if (std::map<std::string, int>::const_iterator search = groups.find(name); search != groups.end()) {
|
724
|
+
rb_hash_aset(capturing_groups, key, re2_matchdata_nth_match(search->second, self));
|
725
|
+
} else {
|
785
726
|
break;
|
786
727
|
}
|
787
|
-
|
788
|
-
rb_hash_aset(capturing_groups, key, re2_matchdata_nth_match(groups[name], self));
|
789
728
|
}
|
790
729
|
}
|
791
730
|
}
|
@@ -802,6 +741,7 @@ static VALUE re2_matchdata_deconstruct_keys(VALUE self, VALUE keys) {
|
|
802
741
|
*/
|
803
742
|
static VALUE re2_re2(int argc, VALUE *argv, VALUE self) {
|
804
743
|
UNUSED(self);
|
744
|
+
|
805
745
|
return rb_class_new_instance(argc, argv, re2_cRegexp);
|
806
746
|
}
|
807
747
|
|
@@ -849,11 +789,11 @@ static VALUE re2_regexp_initialize(int argc, VALUE *argv, VALUE self) {
|
|
849
789
|
|
850
790
|
if (RTEST(options)) {
|
851
791
|
RE2::Options re2_options;
|
852
|
-
parse_re2_options(re2_options, options);
|
792
|
+
parse_re2_options(&re2_options, options);
|
853
793
|
|
854
|
-
p->pattern = new(nothrow) RE2(StringValuePtr(pattern), re2_options);
|
794
|
+
p->pattern = new(std::nothrow) RE2(StringValuePtr(pattern), re2_options);
|
855
795
|
} else {
|
856
|
-
p->pattern = new(nothrow) RE2(StringValuePtr(pattern));
|
796
|
+
p->pattern = new(std::nothrow) RE2(StringValuePtr(pattern));
|
857
797
|
}
|
858
798
|
|
859
799
|
if (p->pattern == 0) {
|
@@ -866,40 +806,47 @@ static VALUE re2_regexp_initialize(int argc, VALUE *argv, VALUE self) {
|
|
866
806
|
/*
|
867
807
|
* Returns a printable version of the regular expression +re2+.
|
868
808
|
*
|
809
|
+
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
810
|
+
* returned in UTF-8 by default or ISO-8859-1 if the :utf8 option for the
|
811
|
+
* RE2::Regexp is set to false (any other encoding's behaviour is undefined).
|
812
|
+
*
|
869
813
|
* @return [String] a printable version of the regular expression
|
870
814
|
* @example
|
871
815
|
* re2 = RE2::Regexp.new("woo?")
|
872
816
|
* re2.inspect #=> "#<RE2::Regexp /woo?/>"
|
873
817
|
*/
|
874
|
-
static VALUE re2_regexp_inspect(VALUE self) {
|
818
|
+
static VALUE re2_regexp_inspect(const VALUE self) {
|
875
819
|
re2_pattern *p;
|
876
|
-
VALUE result;
|
877
|
-
ostringstream output;
|
878
820
|
|
879
821
|
Data_Get_Struct(self, re2_pattern, p);
|
880
822
|
|
881
|
-
|
823
|
+
std::ostringstream output;
|
882
824
|
|
883
|
-
|
884
|
-
p->pattern->options().encoding() == RE2::Options::EncodingUTF8 ? "UTF-8" : "ISO-8859-1");
|
825
|
+
output << "#<RE2::Regexp /" << p->pattern->pattern() << "/>";
|
885
826
|
|
886
|
-
return
|
827
|
+
return encoded_str_new(output.str().data(), output.str().length(),
|
828
|
+
p->pattern->options().encoding());
|
887
829
|
}
|
888
830
|
|
889
831
|
/*
|
890
832
|
* Returns a string version of the regular expression +re2+.
|
891
833
|
*
|
834
|
+
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
835
|
+
* returned in UTF-8 by default or ISO-8859-1 if the :utf8 option for the
|
836
|
+
* RE2::Regexp is set to false (any other encoding's behaviour is undefined).
|
837
|
+
*
|
892
838
|
* @return [String] a string version of the regular expression
|
893
839
|
* @example
|
894
840
|
* re2 = RE2::Regexp.new("woo?")
|
895
841
|
* re2.to_s #=> "woo?"
|
896
842
|
*/
|
897
|
-
static VALUE re2_regexp_to_s(VALUE self) {
|
843
|
+
static VALUE re2_regexp_to_s(const VALUE self) {
|
898
844
|
re2_pattern *p;
|
899
845
|
Data_Get_Struct(self, re2_pattern, p);
|
900
|
-
|
846
|
+
|
847
|
+
return encoded_str_new(p->pattern->pattern().data(),
|
901
848
|
p->pattern->pattern().size(),
|
902
|
-
p->pattern->options().encoding()
|
849
|
+
p->pattern->options().encoding());
|
903
850
|
}
|
904
851
|
|
905
852
|
/*
|
@@ -911,9 +858,10 @@ static VALUE re2_regexp_to_s(VALUE self) {
|
|
911
858
|
* re2 = RE2::Regexp.new("woo?")
|
912
859
|
* re2.ok? #=> true
|
913
860
|
*/
|
914
|
-
static VALUE re2_regexp_ok(VALUE self) {
|
861
|
+
static VALUE re2_regexp_ok(const VALUE self) {
|
915
862
|
re2_pattern *p;
|
916
863
|
Data_Get_Struct(self, re2_pattern, p);
|
864
|
+
|
917
865
|
return BOOL2RUBY(p->pattern->ok());
|
918
866
|
}
|
919
867
|
|
@@ -926,9 +874,10 @@ static VALUE re2_regexp_ok(VALUE self) {
|
|
926
874
|
* re2 = RE2::Regexp.new("woo?", :utf8 => true)
|
927
875
|
* re2.utf8? #=> true
|
928
876
|
*/
|
929
|
-
static VALUE re2_regexp_utf8(VALUE self) {
|
877
|
+
static VALUE re2_regexp_utf8(const VALUE self) {
|
930
878
|
re2_pattern *p;
|
931
879
|
Data_Get_Struct(self, re2_pattern, p);
|
880
|
+
|
932
881
|
return BOOL2RUBY(p->pattern->options().encoding() == RE2::Options::EncodingUTF8);
|
933
882
|
}
|
934
883
|
|
@@ -941,9 +890,10 @@ static VALUE re2_regexp_utf8(VALUE self) {
|
|
941
890
|
* re2 = RE2::Regexp.new("woo?", :posix_syntax => true)
|
942
891
|
* re2.posix_syntax? #=> true
|
943
892
|
*/
|
944
|
-
static VALUE re2_regexp_posix_syntax(VALUE self) {
|
893
|
+
static VALUE re2_regexp_posix_syntax(const VALUE self) {
|
945
894
|
re2_pattern *p;
|
946
895
|
Data_Get_Struct(self, re2_pattern, p);
|
896
|
+
|
947
897
|
return BOOL2RUBY(p->pattern->options().posix_syntax());
|
948
898
|
}
|
949
899
|
|
@@ -956,9 +906,10 @@ static VALUE re2_regexp_posix_syntax(VALUE self) {
|
|
956
906
|
* re2 = RE2::Regexp.new("woo?", :longest_match => true)
|
957
907
|
* re2.longest_match? #=> true
|
958
908
|
*/
|
959
|
-
static VALUE re2_regexp_longest_match(VALUE self) {
|
909
|
+
static VALUE re2_regexp_longest_match(const VALUE self) {
|
960
910
|
re2_pattern *p;
|
961
911
|
Data_Get_Struct(self, re2_pattern, p);
|
912
|
+
|
962
913
|
return BOOL2RUBY(p->pattern->options().longest_match());
|
963
914
|
}
|
964
915
|
|
@@ -971,9 +922,10 @@ static VALUE re2_regexp_longest_match(VALUE self) {
|
|
971
922
|
* re2 = RE2::Regexp.new("woo?", :log_errors => true)
|
972
923
|
* re2.log_errors? #=> true
|
973
924
|
*/
|
974
|
-
static VALUE re2_regexp_log_errors(VALUE self) {
|
925
|
+
static VALUE re2_regexp_log_errors(const VALUE self) {
|
975
926
|
re2_pattern *p;
|
976
927
|
Data_Get_Struct(self, re2_pattern, p);
|
928
|
+
|
977
929
|
return BOOL2RUBY(p->pattern->options().log_errors());
|
978
930
|
}
|
979
931
|
|
@@ -986,9 +938,10 @@ static VALUE re2_regexp_log_errors(VALUE self) {
|
|
986
938
|
* re2 = RE2::Regexp.new("woo?", :max_mem => 1024)
|
987
939
|
* re2.max_mem #=> 1024
|
988
940
|
*/
|
989
|
-
static VALUE re2_regexp_max_mem(VALUE self) {
|
941
|
+
static VALUE re2_regexp_max_mem(const VALUE self) {
|
990
942
|
re2_pattern *p;
|
991
943
|
Data_Get_Struct(self, re2_pattern, p);
|
944
|
+
|
992
945
|
return INT2FIX(p->pattern->options().max_mem());
|
993
946
|
}
|
994
947
|
|
@@ -1001,9 +954,10 @@ static VALUE re2_regexp_max_mem(VALUE self) {
|
|
1001
954
|
* re2 = RE2::Regexp.new("woo?", :literal => true)
|
1002
955
|
* re2.literal? #=> true
|
1003
956
|
*/
|
1004
|
-
static VALUE re2_regexp_literal(VALUE self) {
|
957
|
+
static VALUE re2_regexp_literal(const VALUE self) {
|
1005
958
|
re2_pattern *p;
|
1006
959
|
Data_Get_Struct(self, re2_pattern, p);
|
960
|
+
|
1007
961
|
return BOOL2RUBY(p->pattern->options().literal());
|
1008
962
|
}
|
1009
963
|
|
@@ -1016,9 +970,10 @@ static VALUE re2_regexp_literal(VALUE self) {
|
|
1016
970
|
* re2 = RE2::Regexp.new("woo?", :never_nl => true)
|
1017
971
|
* re2.never_nl? #=> true
|
1018
972
|
*/
|
1019
|
-
static VALUE re2_regexp_never_nl(VALUE self) {
|
973
|
+
static VALUE re2_regexp_never_nl(const VALUE self) {
|
1020
974
|
re2_pattern *p;
|
1021
975
|
Data_Get_Struct(self, re2_pattern, p);
|
976
|
+
|
1022
977
|
return BOOL2RUBY(p->pattern->options().never_nl());
|
1023
978
|
}
|
1024
979
|
|
@@ -1031,9 +986,10 @@ static VALUE re2_regexp_never_nl(VALUE self) {
|
|
1031
986
|
* re2 = RE2::Regexp.new("woo?", :case_sensitive => true)
|
1032
987
|
* re2.case_sensitive? #=> true
|
1033
988
|
*/
|
1034
|
-
static VALUE re2_regexp_case_sensitive(VALUE self) {
|
989
|
+
static VALUE re2_regexp_case_sensitive(const VALUE self) {
|
1035
990
|
re2_pattern *p;
|
1036
991
|
Data_Get_Struct(self, re2_pattern, p);
|
992
|
+
|
1037
993
|
return BOOL2RUBY(p->pattern->options().case_sensitive());
|
1038
994
|
}
|
1039
995
|
|
@@ -1047,7 +1003,7 @@ static VALUE re2_regexp_case_sensitive(VALUE self) {
|
|
1047
1003
|
* re2.case_insensitive? #=> false
|
1048
1004
|
* re2.casefold? #=> false
|
1049
1005
|
*/
|
1050
|
-
static VALUE re2_regexp_case_insensitive(VALUE self) {
|
1006
|
+
static VALUE re2_regexp_case_insensitive(const VALUE self) {
|
1051
1007
|
return BOOL2RUBY(re2_regexp_case_sensitive(self) != Qtrue);
|
1052
1008
|
}
|
1053
1009
|
|
@@ -1060,9 +1016,10 @@ static VALUE re2_regexp_case_insensitive(VALUE self) {
|
|
1060
1016
|
* re2 = RE2::Regexp.new("woo?", :perl_classes => true)
|
1061
1017
|
* re2.perl_classes? #=> true
|
1062
1018
|
*/
|
1063
|
-
static VALUE re2_regexp_perl_classes(VALUE self) {
|
1019
|
+
static VALUE re2_regexp_perl_classes(const VALUE self) {
|
1064
1020
|
re2_pattern *p;
|
1065
1021
|
Data_Get_Struct(self, re2_pattern, p);
|
1022
|
+
|
1066
1023
|
return BOOL2RUBY(p->pattern->options().perl_classes());
|
1067
1024
|
}
|
1068
1025
|
|
@@ -1075,9 +1032,10 @@ static VALUE re2_regexp_perl_classes(VALUE self) {
|
|
1075
1032
|
* re2 = RE2::Regexp.new("woo?", :word_boundary => true)
|
1076
1033
|
* re2.word_boundary? #=> true
|
1077
1034
|
*/
|
1078
|
-
static VALUE re2_regexp_word_boundary(VALUE self) {
|
1035
|
+
static VALUE re2_regexp_word_boundary(const VALUE self) {
|
1079
1036
|
re2_pattern *p;
|
1080
1037
|
Data_Get_Struct(self, re2_pattern, p);
|
1038
|
+
|
1081
1039
|
return BOOL2RUBY(p->pattern->options().word_boundary());
|
1082
1040
|
}
|
1083
1041
|
|
@@ -1090,9 +1048,10 @@ static VALUE re2_regexp_word_boundary(VALUE self) {
|
|
1090
1048
|
* re2 = RE2::Regexp.new("woo?", :one_line => true)
|
1091
1049
|
* re2.one_line? #=> true
|
1092
1050
|
*/
|
1093
|
-
static VALUE re2_regexp_one_line(VALUE self) {
|
1051
|
+
static VALUE re2_regexp_one_line(const VALUE self) {
|
1094
1052
|
re2_pattern *p;
|
1095
1053
|
Data_Get_Struct(self, re2_pattern, p);
|
1054
|
+
|
1096
1055
|
return BOOL2RUBY(p->pattern->options().one_line());
|
1097
1056
|
}
|
1098
1057
|
|
@@ -1102,9 +1061,10 @@ static VALUE re2_regexp_one_line(VALUE self) {
|
|
1102
1061
|
*
|
1103
1062
|
* @return [String, nil] the error string or nil
|
1104
1063
|
*/
|
1105
|
-
static VALUE re2_regexp_error(VALUE self) {
|
1064
|
+
static VALUE re2_regexp_error(const VALUE self) {
|
1106
1065
|
re2_pattern *p;
|
1107
1066
|
Data_Get_Struct(self, re2_pattern, p);
|
1067
|
+
|
1108
1068
|
if (p->pattern->ok()) {
|
1109
1069
|
return Qnil;
|
1110
1070
|
} else {
|
@@ -1116,17 +1076,22 @@ static VALUE re2_regexp_error(VALUE self) {
|
|
1116
1076
|
* If the RE2 could not be created properly, returns
|
1117
1077
|
* the offending portion of the regexp otherwise returns nil.
|
1118
1078
|
*
|
1079
|
+
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
1080
|
+
* returned in UTF-8 by default or ISO-8859-1 if the :utf8 option for the
|
1081
|
+
* RE2::Regexp is set to false (any other encoding's behaviour is undefined).
|
1082
|
+
*
|
1119
1083
|
* @return [String, nil] the offending portion of the regexp or nil
|
1120
1084
|
*/
|
1121
|
-
static VALUE re2_regexp_error_arg(VALUE self) {
|
1085
|
+
static VALUE re2_regexp_error_arg(const VALUE self) {
|
1122
1086
|
re2_pattern *p;
|
1123
1087
|
Data_Get_Struct(self, re2_pattern, p);
|
1088
|
+
|
1124
1089
|
if (p->pattern->ok()) {
|
1125
1090
|
return Qnil;
|
1126
1091
|
} else {
|
1127
|
-
return
|
1092
|
+
return encoded_str_new(p->pattern->error_arg().data(),
|
1128
1093
|
p->pattern->error_arg().size(),
|
1129
|
-
p->pattern->options().encoding()
|
1094
|
+
p->pattern->options().encoding());
|
1130
1095
|
}
|
1131
1096
|
}
|
1132
1097
|
|
@@ -1137,9 +1102,10 @@ static VALUE re2_regexp_error_arg(VALUE self) {
|
|
1137
1102
|
*
|
1138
1103
|
* @return [Integer] the regexp "cost"
|
1139
1104
|
*/
|
1140
|
-
static VALUE re2_regexp_program_size(VALUE self) {
|
1105
|
+
static VALUE re2_regexp_program_size(const VALUE self) {
|
1141
1106
|
re2_pattern *p;
|
1142
1107
|
Data_Get_Struct(self, re2_pattern, p);
|
1108
|
+
|
1143
1109
|
return INT2FIX(p->pattern->ProgramSize());
|
1144
1110
|
}
|
1145
1111
|
|
@@ -1149,12 +1115,11 @@ static VALUE re2_regexp_program_size(VALUE self) {
|
|
1149
1115
|
*
|
1150
1116
|
* @return [Hash] the options
|
1151
1117
|
*/
|
1152
|
-
static VALUE re2_regexp_options(VALUE self) {
|
1153
|
-
VALUE options;
|
1118
|
+
static VALUE re2_regexp_options(const VALUE self) {
|
1154
1119
|
re2_pattern *p;
|
1155
1120
|
|
1156
1121
|
Data_Get_Struct(self, re2_pattern, p);
|
1157
|
-
options = rb_hash_new();
|
1122
|
+
VALUE options = rb_hash_new();
|
1158
1123
|
|
1159
1124
|
rb_hash_aset(options, ID2SYM(id_utf8),
|
1160
1125
|
BOOL2RUBY(p->pattern->options().encoding() == RE2::Options::EncodingUTF8));
|
@@ -1202,33 +1167,34 @@ static VALUE re2_regexp_options(VALUE self) {
|
|
1202
1167
|
*
|
1203
1168
|
* @return [Integer] the number of capturing subpatterns
|
1204
1169
|
*/
|
1205
|
-
static VALUE re2_regexp_number_of_capturing_groups(VALUE self) {
|
1170
|
+
static VALUE re2_regexp_number_of_capturing_groups(const VALUE self) {
|
1206
1171
|
re2_pattern *p;
|
1207
|
-
|
1208
1172
|
Data_Get_Struct(self, re2_pattern, p);
|
1173
|
+
|
1209
1174
|
return INT2FIX(p->pattern->NumberOfCapturingGroups());
|
1210
1175
|
}
|
1211
1176
|
|
1212
1177
|
/*
|
1213
1178
|
* Returns a hash of names to capturing indices of groups.
|
1214
1179
|
*
|
1180
|
+
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
1181
|
+
* returned in UTF-8 by default or ISO-8859-1 if the :utf8 option for the
|
1182
|
+
* RE2::Regexp is set to false (any other encoding's behaviour is undefined).
|
1183
|
+
*
|
1215
1184
|
* @return [Hash] a hash of names to capturing indices
|
1216
1185
|
*/
|
1217
|
-
static VALUE re2_regexp_named_capturing_groups(VALUE self) {
|
1218
|
-
VALUE capturing_groups;
|
1186
|
+
static VALUE re2_regexp_named_capturing_groups(const VALUE self) {
|
1219
1187
|
re2_pattern *p;
|
1220
|
-
map<string, int> groups;
|
1221
|
-
map<string, int>::iterator iterator;
|
1222
1188
|
|
1223
1189
|
Data_Get_Struct(self, re2_pattern, p);
|
1224
|
-
groups = p->pattern->NamedCapturingGroups();
|
1225
|
-
capturing_groups = rb_hash_new();
|
1190
|
+
const std::map<std::string, int>& groups = p->pattern->NamedCapturingGroups();
|
1191
|
+
VALUE capturing_groups = rb_hash_new();
|
1226
1192
|
|
1227
|
-
for (
|
1193
|
+
for (std::map<std::string, int>::const_iterator it = groups.begin(); it != groups.end(); ++it) {
|
1228
1194
|
rb_hash_aset(capturing_groups,
|
1229
|
-
|
1230
|
-
p->pattern->options().encoding()
|
1231
|
-
INT2FIX(
|
1195
|
+
encoded_str_new(it->first.data(), it->first.size(),
|
1196
|
+
p->pattern->options().encoding()),
|
1197
|
+
INT2FIX(it->second));
|
1232
1198
|
}
|
1233
1199
|
|
1234
1200
|
return capturing_groups;
|
@@ -1242,16 +1208,23 @@ static VALUE re2_regexp_named_capturing_groups(VALUE self) {
|
|
1242
1208
|
* @return [Boolean, RE2::MatchData]
|
1243
1209
|
*
|
1244
1210
|
* @overload match(text)
|
1245
|
-
* Returns an {RE2::MatchData} containing the matching
|
1246
|
-
*
|
1247
|
-
*
|
1211
|
+
* Returns an {RE2::MatchData} containing the matching pattern and all
|
1212
|
+
* subpatterns resulting from looking for the regexp in +text+ if the pattern
|
1213
|
+
* contains capturing groups.
|
1214
|
+
*
|
1215
|
+
* Returns either true or false indicating whether a successful match was
|
1216
|
+
* made if the pattern contains no capturing groups.
|
1248
1217
|
*
|
1249
1218
|
* @param [String] text the text to search
|
1250
|
-
* @return [RE2::MatchData] the
|
1219
|
+
* @return [RE2::MatchData] if the pattern contains capturing groups
|
1220
|
+
* @return [Boolean] if the pattern does not contain capturing groups
|
1251
1221
|
* @raise [NoMemoryError] if there was not enough memory to allocate the matches
|
1252
|
-
* @example
|
1222
|
+
* @example Matching with capturing groups
|
1253
1223
|
* r = RE2::Regexp.new('w(o)(o)')
|
1254
1224
|
* r.match('woo') #=> #<RE2::MatchData "woo" 1:"o" 2:"o">
|
1225
|
+
* @example Matching without capturing groups
|
1226
|
+
* r = RE2::Regexp.new('woo')
|
1227
|
+
* r.match('woo') #=> true
|
1255
1228
|
*
|
1256
1229
|
* @overload match(text, 0)
|
1257
1230
|
* Returns either true or false indicating whether a
|
@@ -1279,20 +1252,20 @@ static VALUE re2_regexp_named_capturing_groups(VALUE self) {
|
|
1279
1252
|
* r.match('woo', 1) #=> #<RE2::MatchData "woo" 1:"o">
|
1280
1253
|
* r.match('woo', 3) #=> #<RE2::MatchData "woo" 1:"o" 2:"o" 3:nil>
|
1281
1254
|
*/
|
1282
|
-
static VALUE re2_regexp_match(int argc, VALUE *argv, VALUE self) {
|
1283
|
-
int n;
|
1284
|
-
bool matched;
|
1255
|
+
static VALUE re2_regexp_match(int argc, VALUE *argv, const VALUE self) {
|
1285
1256
|
re2_pattern *p;
|
1286
1257
|
re2_matchdata *m;
|
1287
|
-
VALUE text, number_of_matches
|
1258
|
+
VALUE text, number_of_matches;
|
1288
1259
|
|
1289
1260
|
rb_scan_args(argc, argv, "11", &text, &number_of_matches);
|
1290
1261
|
|
1291
1262
|
/* Ensure text is a string. */
|
1292
|
-
|
1263
|
+
StringValue(text);
|
1293
1264
|
|
1294
1265
|
Data_Get_Struct(self, re2_pattern, p);
|
1295
1266
|
|
1267
|
+
int n;
|
1268
|
+
|
1296
1269
|
if (RTEST(number_of_matches)) {
|
1297
1270
|
n = NUM2INT(number_of_matches);
|
1298
1271
|
|
@@ -1308,17 +1281,21 @@ static VALUE re2_regexp_match(int argc, VALUE *argv, VALUE self) {
|
|
1308
1281
|
}
|
1309
1282
|
|
1310
1283
|
if (n == 0) {
|
1311
|
-
|
1312
|
-
|
1284
|
+
#ifdef HAVE_ENDPOS_ARGUMENT
|
1285
|
+
bool matched = p->pattern->Match(StringValuePtr(text), 0,
|
1286
|
+
RSTRING_LEN(text), RE2::UNANCHORED, 0, 0);
|
1287
|
+
#else
|
1288
|
+
bool matched = p->pattern->Match(StringValuePtr(text), 0, RE2::UNANCHORED,
|
1289
|
+
0, 0);
|
1290
|
+
#endif
|
1313
1291
|
return BOOL2RUBY(matched);
|
1314
1292
|
} else {
|
1315
|
-
|
1316
1293
|
/* Because match returns the whole match as well. */
|
1317
1294
|
n += 1;
|
1318
1295
|
|
1319
|
-
matchdata = rb_class_new_instance(0, 0, re2_cMatchData);
|
1296
|
+
VALUE matchdata = rb_class_new_instance(0, 0, re2_cMatchData);
|
1320
1297
|
Data_Get_Struct(matchdata, re2_matchdata, m);
|
1321
|
-
m->matches = new(nothrow) re2::StringPiece[n];
|
1298
|
+
m->matches = new(std::nothrow) re2::StringPiece[n];
|
1322
1299
|
m->regexp = self;
|
1323
1300
|
m->text = rb_str_dup(text);
|
1324
1301
|
rb_str_freeze(m->text);
|
@@ -1330,10 +1307,13 @@ static VALUE re2_regexp_match(int argc, VALUE *argv, VALUE self) {
|
|
1330
1307
|
|
1331
1308
|
m->number_of_matches = n;
|
1332
1309
|
|
1333
|
-
|
1334
|
-
|
1335
|
-
|
1336
|
-
|
1310
|
+
#ifdef HAVE_ENDPOS_ARGUMENT
|
1311
|
+
bool matched = p->pattern->Match(StringValuePtr(m->text), 0,
|
1312
|
+
RSTRING_LEN(m->text), RE2::UNANCHORED, m->matches, n);
|
1313
|
+
#else
|
1314
|
+
bool matched = p->pattern->Match(StringValuePtr(m->text), 0,
|
1315
|
+
RE2::UNANCHORED, m->matches, n);
|
1316
|
+
#endif
|
1337
1317
|
if (matched) {
|
1338
1318
|
return matchdata;
|
1339
1319
|
} else {
|
@@ -1348,10 +1328,8 @@ static VALUE re2_regexp_match(int argc, VALUE *argv, VALUE self) {
|
|
1348
1328
|
*
|
1349
1329
|
* @return [Boolean] whether the match was successful
|
1350
1330
|
*/
|
1351
|
-
static VALUE re2_regexp_match_p(VALUE self, VALUE text) {
|
1352
|
-
VALUE argv[2];
|
1353
|
-
argv[0] = text;
|
1354
|
-
argv[1] = INT2FIX(0);
|
1331
|
+
static VALUE re2_regexp_match_p(const VALUE self, VALUE text) {
|
1332
|
+
VALUE argv[2] = { text, INT2FIX(0) };
|
1355
1333
|
|
1356
1334
|
return re2_regexp_match(2, argv, self);
|
1357
1335
|
}
|
@@ -1362,16 +1340,15 @@ static VALUE re2_regexp_match_p(VALUE self, VALUE text) {
|
|
1362
1340
|
* @example
|
1363
1341
|
* c = RE2::Regexp.new('(\w+)').scan("Foo bar baz")
|
1364
1342
|
*/
|
1365
|
-
static VALUE re2_regexp_scan(VALUE self, VALUE text) {
|
1343
|
+
static VALUE re2_regexp_scan(const VALUE self, VALUE text) {
|
1366
1344
|
re2_pattern *p;
|
1367
1345
|
re2_scanner *c;
|
1368
|
-
VALUE scanner;
|
1369
1346
|
|
1370
1347
|
Data_Get_Struct(self, re2_pattern, p);
|
1371
|
-
scanner = rb_class_new_instance(0, 0, re2_cScanner);
|
1348
|
+
VALUE scanner = rb_class_new_instance(0, 0, re2_cScanner);
|
1372
1349
|
Data_Get_Struct(scanner, re2_scanner, c);
|
1373
1350
|
|
1374
|
-
c->input = new(nothrow) re2::StringPiece(StringValuePtr(text));
|
1351
|
+
c->input = new(std::nothrow) re2::StringPiece(StringValuePtr(text));
|
1375
1352
|
c->regexp = self;
|
1376
1353
|
c->text = text;
|
1377
1354
|
|
@@ -1390,6 +1367,10 @@ static VALUE re2_regexp_scan(VALUE self, VALUE text) {
|
|
1390
1367
|
* Returns a copy of +str+ with the first occurrence +pattern+
|
1391
1368
|
* replaced with +rewrite+.
|
1392
1369
|
*
|
1370
|
+
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
1371
|
+
* returned in UTF-8 by default or ISO-8859-1 if the :utf8 option for the
|
1372
|
+
* RE2::Regexp is set to false (any other encoding's behaviour is undefined).
|
1373
|
+
*
|
1393
1374
|
* @param [String] str the string to modify
|
1394
1375
|
* @param [String, RE2::Regexp] pattern a regexp matching text to be replaced
|
1395
1376
|
* @param [String] rewrite the string to replace with
|
@@ -1404,29 +1385,33 @@ static VALUE re2_Replace(VALUE self, VALUE str, VALUE pattern,
|
|
1404
1385
|
UNUSED(self);
|
1405
1386
|
re2_pattern *p;
|
1406
1387
|
|
1407
|
-
/*
|
1408
|
-
|
1388
|
+
/* Take a copy of str so it can be modified in-place by
|
1389
|
+
* RE2::Replace.
|
1390
|
+
*/
|
1391
|
+
std::string str_as_string(StringValuePtr(str));
|
1409
1392
|
|
1410
1393
|
/* Do the replacement. */
|
1411
1394
|
if (rb_obj_is_kind_of(pattern, re2_cRegexp)) {
|
1412
1395
|
Data_Get_Struct(pattern, re2_pattern, p);
|
1413
1396
|
RE2::Replace(&str_as_string, *p->pattern, StringValuePtr(rewrite));
|
1414
1397
|
|
1415
|
-
return
|
1416
|
-
p->pattern->options().encoding()
|
1398
|
+
return encoded_str_new(str_as_string.data(), str_as_string.size(),
|
1399
|
+
p->pattern->options().encoding());
|
1417
1400
|
} else {
|
1418
1401
|
RE2::Replace(&str_as_string, StringValuePtr(pattern),
|
1419
1402
|
StringValuePtr(rewrite));
|
1420
1403
|
|
1421
|
-
return
|
1422
|
-
pattern);
|
1404
|
+
return encoded_str_new(str_as_string.data(), str_as_string.size(), RE2::Options::EncodingUTF8);
|
1423
1405
|
}
|
1424
|
-
|
1425
1406
|
}
|
1426
1407
|
|
1427
1408
|
/*
|
1428
1409
|
* Return a copy of +str+ with +pattern+ replaced by +rewrite+.
|
1429
1410
|
*
|
1411
|
+
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
1412
|
+
* returned in UTF-8 by default or ISO-8859-1 if the :utf8 option for the
|
1413
|
+
* RE2::Regexp is set to false (any other encoding's behaviour is undefined).
|
1414
|
+
*
|
1430
1415
|
* @param [String] str the string to modify
|
1431
1416
|
* @param [String, RE2::Regexp] pattern a regexp matching text to be replaced
|
1432
1417
|
* @param [String] rewrite the string to replace with
|
@@ -1440,23 +1425,24 @@ static VALUE re2_GlobalReplace(VALUE self, VALUE str, VALUE pattern,
|
|
1440
1425
|
VALUE rewrite) {
|
1441
1426
|
UNUSED(self);
|
1442
1427
|
|
1443
|
-
/*
|
1428
|
+
/* Take a copy of str so it can be modified in-place by
|
1429
|
+
* RE2::GlobalReplace.
|
1430
|
+
*/
|
1444
1431
|
re2_pattern *p;
|
1445
|
-
string str_as_string(StringValuePtr(str));
|
1432
|
+
std::string str_as_string(StringValuePtr(str));
|
1446
1433
|
|
1447
1434
|
/* Do the replacement. */
|
1448
1435
|
if (rb_obj_is_kind_of(pattern, re2_cRegexp)) {
|
1449
1436
|
Data_Get_Struct(pattern, re2_pattern, p);
|
1450
1437
|
RE2::GlobalReplace(&str_as_string, *p->pattern, StringValuePtr(rewrite));
|
1451
1438
|
|
1452
|
-
return
|
1453
|
-
p->pattern->options().encoding()
|
1439
|
+
return encoded_str_new(str_as_string.data(), str_as_string.size(),
|
1440
|
+
p->pattern->options().encoding());
|
1454
1441
|
} else {
|
1455
1442
|
RE2::GlobalReplace(&str_as_string, StringValuePtr(pattern),
|
1456
1443
|
StringValuePtr(rewrite));
|
1457
1444
|
|
1458
|
-
return
|
1459
|
-
pattern);
|
1445
|
+
return encoded_str_new(str_as_string.data(), str_as_string.size(), RE2::Options::EncodingUTF8);
|
1460
1446
|
}
|
1461
1447
|
}
|
1462
1448
|
|
@@ -1472,11 +1458,12 @@ static VALUE re2_GlobalReplace(VALUE self, VALUE str, VALUE pattern,
|
|
1472
1458
|
*/
|
1473
1459
|
static VALUE re2_QuoteMeta(VALUE self, VALUE unquoted) {
|
1474
1460
|
UNUSED(self);
|
1475
|
-
string quoted_string = RE2::QuoteMeta(StringValuePtr(unquoted));
|
1461
|
+
std::string quoted_string = RE2::QuoteMeta(StringValuePtr(unquoted));
|
1462
|
+
|
1476
1463
|
return rb_str_new(quoted_string.data(), quoted_string.size());
|
1477
1464
|
}
|
1478
1465
|
|
1479
|
-
void re2_set_free(re2_set *self) {
|
1466
|
+
static void re2_set_free(re2_set *self) {
|
1480
1467
|
if (self->set) {
|
1481
1468
|
delete self->set;
|
1482
1469
|
}
|
@@ -1486,6 +1473,7 @@ void re2_set_free(re2_set *self) {
|
|
1486
1473
|
static VALUE re2_set_allocate(VALUE klass) {
|
1487
1474
|
re2_set *s;
|
1488
1475
|
VALUE result = Data_Make_Struct(klass, re2_set, 0, re2_set_free, s);
|
1476
|
+
|
1489
1477
|
return result;
|
1490
1478
|
}
|
1491
1479
|
|
@@ -1533,18 +1521,16 @@ static VALUE re2_set_allocate(VALUE klass) {
|
|
1533
1521
|
static VALUE re2_set_initialize(int argc, VALUE *argv, VALUE self) {
|
1534
1522
|
VALUE anchor, options;
|
1535
1523
|
re2_set *s;
|
1536
|
-
RE2::Anchor re2_anchor;
|
1524
|
+
RE2::Anchor re2_anchor = RE2::UNANCHORED;
|
1537
1525
|
RE2::Options re2_options;
|
1538
1526
|
|
1539
1527
|
rb_scan_args(argc, argv, "02", &anchor, &options);
|
1540
1528
|
Data_Get_Struct(self, re2_set, s);
|
1541
1529
|
|
1542
1530
|
if (RTEST(options)) {
|
1543
|
-
parse_re2_options(re2_options, options);
|
1531
|
+
parse_re2_options(&re2_options, options);
|
1544
1532
|
}
|
1545
|
-
if (NIL_P(anchor)) {
|
1546
|
-
re2_anchor = RE2::UNANCHORED;
|
1547
|
-
} else {
|
1533
|
+
if (!NIL_P(anchor)) {
|
1548
1534
|
Check_Type(anchor, T_SYMBOL);
|
1549
1535
|
ID id_anchor = SYM2ID(anchor);
|
1550
1536
|
if (id_anchor == id_unanchored) {
|
@@ -1558,7 +1544,7 @@ static VALUE re2_set_initialize(int argc, VALUE *argv, VALUE self) {
|
|
1558
1544
|
}
|
1559
1545
|
}
|
1560
1546
|
|
1561
|
-
s->set = new(nothrow) RE2::Set(re2_options, re2_anchor);
|
1547
|
+
s->set = new(std::nothrow) RE2::Set(re2_options, re2_anchor);
|
1562
1548
|
if (s->set == 0) {
|
1563
1549
|
rb_raise(rb_eNoMemError, "not enough memory to allocate RE2::Set object");
|
1564
1550
|
}
|
@@ -1579,11 +1565,12 @@ static VALUE re2_set_initialize(int argc, VALUE *argv, VALUE self) {
|
|
1579
1565
|
* set.add("def") #=> 1
|
1580
1566
|
*/
|
1581
1567
|
static VALUE re2_set_add(VALUE self, VALUE pattern) {
|
1582
|
-
|
1568
|
+
StringValue(pattern);
|
1583
1569
|
re2::StringPiece regex(RSTRING_PTR(pattern), RSTRING_LEN(pattern));
|
1584
1570
|
std::string err;
|
1585
1571
|
re2_set *s;
|
1586
1572
|
Data_Get_Struct(self, re2_set, s);
|
1573
|
+
|
1587
1574
|
int index = s->set->Add(regex, &err);
|
1588
1575
|
if (index < 0) {
|
1589
1576
|
rb_raise(rb_eArgError, "str rejected by RE2::Set->Add(): %s", err.c_str());
|
@@ -1665,25 +1652,27 @@ static VALUE re2_set_match_raises_errors_p(VALUE self) {
|
|
1665
1652
|
* set.compile
|
1666
1653
|
* set.match("abcdef", :exception => true) # => [0, 1]
|
1667
1654
|
*/
|
1668
|
-
static VALUE re2_set_match(int argc, VALUE *argv, VALUE self) {
|
1669
|
-
VALUE str, options
|
1655
|
+
static VALUE re2_set_match(int argc, VALUE *argv, const VALUE self) {
|
1656
|
+
VALUE str, options;
|
1670
1657
|
bool raise_exception = true;
|
1671
1658
|
rb_scan_args(argc, argv, "11", &str, &options);
|
1672
|
-
|
1659
|
+
|
1660
|
+
StringValue(str);
|
1673
1661
|
re2::StringPiece data(RSTRING_PTR(str), RSTRING_LEN(str));
|
1674
|
-
std::vector<int> v;
|
1675
1662
|
re2_set *s;
|
1676
1663
|
Data_Get_Struct(self, re2_set, s);
|
1677
1664
|
|
1678
1665
|
if (RTEST(options)) {
|
1679
1666
|
Check_Type(options, T_HASH);
|
1680
1667
|
|
1681
|
-
exception_option = rb_hash_aref(options, ID2SYM(id_exception));
|
1668
|
+
VALUE exception_option = rb_hash_aref(options, ID2SYM(id_exception));
|
1682
1669
|
if (!NIL_P(exception_option)) {
|
1683
1670
|
raise_exception = RTEST(exception_option);
|
1684
1671
|
}
|
1685
1672
|
}
|
1686
1673
|
|
1674
|
+
std::vector<int> v;
|
1675
|
+
|
1687
1676
|
if (raise_exception) {
|
1688
1677
|
#ifdef HAVE_ERROR_INFO_ARGUMENT
|
1689
1678
|
RE2::Set::ErrorInfo e;
|
@@ -1704,7 +1693,7 @@ static VALUE re2_set_match(int argc, VALUE *argv, VALUE self) {
|
|
1704
1693
|
rb_raise(re2_eSetMatchError, "Unknown RE2::Set::ErrorKind: %d", e.kind);
|
1705
1694
|
}
|
1706
1695
|
} else {
|
1707
|
-
for (
|
1696
|
+
for (std::vector<int>::size_type i = 0; i < v.size(); ++i) {
|
1708
1697
|
rb_ary_push(result, INT2FIX(v[i]));
|
1709
1698
|
}
|
1710
1699
|
}
|
@@ -1718,7 +1707,7 @@ static VALUE re2_set_match(int argc, VALUE *argv, VALUE self) {
|
|
1718
1707
|
VALUE result = rb_ary_new2(v.size());
|
1719
1708
|
|
1720
1709
|
if (matched) {
|
1721
|
-
for (
|
1710
|
+
for (std::vector<int>::size_type i = 0; i < v.size(); ++i) {
|
1722
1711
|
rb_ary_push(result, INT2FIX(v[i]));
|
1723
1712
|
}
|
1724
1713
|
}
|
@@ -1868,7 +1857,7 @@ void Init_re2(void) {
|
|
1868
1857
|
rb_define_singleton_method(re2_cRegexp, "compile",
|
1869
1858
|
RUBY_METHOD_FUNC(rb_class_new_instance), -1);
|
1870
1859
|
|
1871
|
-
|
1860
|
+
rb_define_module_function(rb_mKernel, "RE2", RUBY_METHOD_FUNC(re2_re2), -1);
|
1872
1861
|
|
1873
1862
|
/* Create the symbols used in options. */
|
1874
1863
|
id_utf8 = rb_intern("utf8");
|