re2 2.0.0 → 2.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/LICENSE-DEPENDENCIES.txt +237 -0
- data/LICENSE.txt +1 -1
- data/README.md +48 -17
- data/ext/re2/extconf.rb +0 -1
- data/ext/re2/re2.cc +309 -317
- data/lib/re2/version.rb +1 -1
- data/re2.gemspec +5 -4
- data/spec/kernel_spec.rb +3 -3
- data/spec/re2/match_data_spec.rb +24 -0
- data/spec/re2/regexp_spec.rb +6 -0
- data/spec/re2/scanner_spec.rb +76 -22
- data/spec/re2/set_spec.rb +41 -1
- data/spec/re2/string_spec.rb +7 -3
- data/spec/re2_spec.rb +104 -10
- data/spec/spec_helper.rb +10 -0
- metadata +6 -4
data/ext/re2/re2.cc
CHANGED
@@ -6,77 +6,21 @@
|
|
6
6
|
* Released under the BSD Licence, please see LICENSE.txt
|
7
7
|
*/
|
8
8
|
|
9
|
-
#include <ruby.h>
|
10
|
-
#include <re2/re2.h>
|
11
|
-
#include <re2/set.h>
|
12
9
|
#include <stdint.h>
|
13
|
-
|
10
|
+
|
11
|
+
#include <map>
|
14
12
|
#include <sstream>
|
13
|
+
#include <string>
|
15
14
|
#include <vector>
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
15
|
+
|
16
|
+
#include <re2/re2.h>
|
17
|
+
#include <re2/set.h>
|
18
|
+
#include <ruby.h>
|
19
|
+
#include <ruby/encoding.h>
|
21
20
|
|
22
21
|
#define BOOL2RUBY(v) (v ? Qtrue : Qfalse)
|
23
22
|
#define UNUSED(x) ((void)x)
|
24
23
|
|
25
|
-
#ifndef RSTRING_LEN
|
26
|
-
#define RSTRING_LEN(x) (RSTRING(x)->len)
|
27
|
-
#endif
|
28
|
-
|
29
|
-
#ifndef RSTRING_PTR
|
30
|
-
#define RSTRING_PTR(x) (RSTRING(x)->ptr)
|
31
|
-
#endif
|
32
|
-
|
33
|
-
#ifdef HAVE_RUBY_ENCODING_H
|
34
|
-
#include <ruby/encoding.h>
|
35
|
-
#define ENCODED_STR_NEW(str, length, encoding) \
|
36
|
-
({ \
|
37
|
-
VALUE _string = rb_str_new(str, length); \
|
38
|
-
int _enc = rb_enc_find_index(encoding); \
|
39
|
-
rb_enc_associate_index(_string, _enc); \
|
40
|
-
_string; \
|
41
|
-
})
|
42
|
-
#define ENCODED_STR_NEW2(str, length, str2) \
|
43
|
-
({ \
|
44
|
-
VALUE _string = rb_str_new(str, length); \
|
45
|
-
int _enc = rb_enc_get_index(str2); \
|
46
|
-
rb_enc_associate_index(_string, _enc); \
|
47
|
-
_string; \
|
48
|
-
})
|
49
|
-
#else
|
50
|
-
#define ENCODED_STR_NEW(str, length, encoding) \
|
51
|
-
rb_str_new((const char *)str, (long)length)
|
52
|
-
#define ENCODED_STR_NEW2(str, length, str2) \
|
53
|
-
rb_str_new((const char *)str, (long)length)
|
54
|
-
#endif
|
55
|
-
|
56
|
-
#ifdef HAVE_RB_STR_SUBLEN
|
57
|
-
#define ENCODED_STR_SUBLEN(str, offset, encoding) \
|
58
|
-
LONG2NUM(rb_str_sublen(str, offset))
|
59
|
-
#else
|
60
|
-
#ifdef HAVE_RUBY_ENCODING_H
|
61
|
-
#define ENCODED_STR_SUBLEN(str, offset, encoding) \
|
62
|
-
({ \
|
63
|
-
VALUE _string = ENCODED_STR_NEW(RSTRING_PTR(str), offset, encoding); \
|
64
|
-
rb_str_length(_string); \
|
65
|
-
})
|
66
|
-
#else
|
67
|
-
#define ENCODED_STR_SUBLEN(str, offset, encoding) \
|
68
|
-
LONG2NUM(offset)
|
69
|
-
#endif
|
70
|
-
#endif
|
71
|
-
|
72
|
-
#ifdef HAVE_ENDPOS_ARGUMENT
|
73
|
-
#define match(pattern, text, startpos, endpos, anchor, match, nmatch) \
|
74
|
-
(pattern->Match(text, startpos, endpos, anchor, match, nmatch))
|
75
|
-
#else
|
76
|
-
#define match(pattern, text, startpos, endpos, anchor, match, nmatch) \
|
77
|
-
(pattern->Match(text, startpos, anchor, match, nmatch))
|
78
|
-
#endif
|
79
|
-
|
80
24
|
typedef struct {
|
81
25
|
RE2 *pattern;
|
82
26
|
} re2_pattern;
|
@@ -107,95 +51,103 @@ static ID id_utf8, id_posix_syntax, id_longest_match, id_log_errors,
|
|
107
51
|
id_perl_classes, id_word_boundary, id_one_line,
|
108
52
|
id_unanchored, id_anchor_start, id_anchor_both, id_exception;
|
109
53
|
|
110
|
-
|
54
|
+
inline VALUE encoded_str_new(const char *str, long length, RE2::Options::Encoding encoding) {
|
55
|
+
if (encoding == RE2::Options::EncodingUTF8) {
|
56
|
+
return rb_utf8_str_new(str, length);
|
57
|
+
}
|
58
|
+
|
59
|
+
VALUE string = rb_str_new(str, length);
|
60
|
+
rb_enc_associate_index(string, rb_enc_find_index("ISO-8859-1"));
|
61
|
+
|
62
|
+
return string;
|
63
|
+
}
|
64
|
+
|
65
|
+
static void parse_re2_options(RE2::Options* re2_options, const VALUE options) {
|
111
66
|
if (TYPE(options) != T_HASH) {
|
112
67
|
rb_raise(rb_eArgError, "options should be a hash");
|
113
68
|
}
|
114
|
-
VALUE utf8, posix_syntax, longest_match, log_errors,
|
115
|
-
max_mem, literal, never_nl, case_sensitive, perl_classes,
|
116
|
-
word_boundary, one_line;
|
117
69
|
|
118
|
-
utf8 = rb_hash_aref(options, ID2SYM(id_utf8));
|
70
|
+
VALUE utf8 = rb_hash_aref(options, ID2SYM(id_utf8));
|
119
71
|
if (!NIL_P(utf8)) {
|
120
|
-
re2_options
|
72
|
+
re2_options->set_encoding(RTEST(utf8) ? RE2::Options::EncodingUTF8 : RE2::Options::EncodingLatin1);
|
121
73
|
}
|
122
74
|
|
123
|
-
posix_syntax = rb_hash_aref(options, ID2SYM(id_posix_syntax));
|
75
|
+
VALUE posix_syntax = rb_hash_aref(options, ID2SYM(id_posix_syntax));
|
124
76
|
if (!NIL_P(posix_syntax)) {
|
125
|
-
re2_options
|
77
|
+
re2_options->set_posix_syntax(RTEST(posix_syntax));
|
126
78
|
}
|
127
79
|
|
128
|
-
longest_match = rb_hash_aref(options, ID2SYM(id_longest_match));
|
80
|
+
VALUE longest_match = rb_hash_aref(options, ID2SYM(id_longest_match));
|
129
81
|
if (!NIL_P(longest_match)) {
|
130
|
-
re2_options
|
82
|
+
re2_options->set_longest_match(RTEST(longest_match));
|
131
83
|
}
|
132
84
|
|
133
|
-
log_errors = rb_hash_aref(options, ID2SYM(id_log_errors));
|
85
|
+
VALUE log_errors = rb_hash_aref(options, ID2SYM(id_log_errors));
|
134
86
|
if (!NIL_P(log_errors)) {
|
135
|
-
re2_options
|
87
|
+
re2_options->set_log_errors(RTEST(log_errors));
|
136
88
|
}
|
137
89
|
|
138
|
-
max_mem = rb_hash_aref(options, ID2SYM(id_max_mem));
|
90
|
+
VALUE max_mem = rb_hash_aref(options, ID2SYM(id_max_mem));
|
139
91
|
if (!NIL_P(max_mem)) {
|
140
|
-
re2_options
|
92
|
+
re2_options->set_max_mem(NUM2INT(max_mem));
|
141
93
|
}
|
142
94
|
|
143
|
-
literal = rb_hash_aref(options, ID2SYM(id_literal));
|
95
|
+
VALUE literal = rb_hash_aref(options, ID2SYM(id_literal));
|
144
96
|
if (!NIL_P(literal)) {
|
145
|
-
re2_options
|
97
|
+
re2_options->set_literal(RTEST(literal));
|
146
98
|
}
|
147
99
|
|
148
|
-
never_nl = rb_hash_aref(options, ID2SYM(id_never_nl));
|
100
|
+
VALUE never_nl = rb_hash_aref(options, ID2SYM(id_never_nl));
|
149
101
|
if (!NIL_P(never_nl)) {
|
150
|
-
re2_options
|
102
|
+
re2_options->set_never_nl(RTEST(never_nl));
|
151
103
|
}
|
152
104
|
|
153
|
-
case_sensitive = rb_hash_aref(options, ID2SYM(id_case_sensitive));
|
105
|
+
VALUE case_sensitive = rb_hash_aref(options, ID2SYM(id_case_sensitive));
|
154
106
|
if (!NIL_P(case_sensitive)) {
|
155
|
-
re2_options
|
107
|
+
re2_options->set_case_sensitive(RTEST(case_sensitive));
|
156
108
|
}
|
157
109
|
|
158
|
-
perl_classes = rb_hash_aref(options, ID2SYM(id_perl_classes));
|
110
|
+
VALUE perl_classes = rb_hash_aref(options, ID2SYM(id_perl_classes));
|
159
111
|
if (!NIL_P(perl_classes)) {
|
160
|
-
re2_options
|
112
|
+
re2_options->set_perl_classes(RTEST(perl_classes));
|
161
113
|
}
|
162
114
|
|
163
|
-
word_boundary = rb_hash_aref(options, ID2SYM(id_word_boundary));
|
115
|
+
VALUE word_boundary = rb_hash_aref(options, ID2SYM(id_word_boundary));
|
164
116
|
if (!NIL_P(word_boundary)) {
|
165
|
-
re2_options
|
117
|
+
re2_options->set_word_boundary(RTEST(word_boundary));
|
166
118
|
}
|
167
119
|
|
168
|
-
one_line = rb_hash_aref(options, ID2SYM(id_one_line));
|
120
|
+
VALUE one_line = rb_hash_aref(options, ID2SYM(id_one_line));
|
169
121
|
if (!NIL_P(one_line)) {
|
170
|
-
re2_options
|
122
|
+
re2_options->set_one_line(RTEST(one_line));
|
171
123
|
}
|
172
124
|
}
|
173
125
|
|
174
|
-
void re2_matchdata_mark(re2_matchdata* self) {
|
126
|
+
static void re2_matchdata_mark(re2_matchdata* self) {
|
175
127
|
rb_gc_mark(self->regexp);
|
176
128
|
rb_gc_mark(self->text);
|
177
129
|
}
|
178
130
|
|
179
|
-
void re2_matchdata_free(re2_matchdata* self) {
|
131
|
+
static void re2_matchdata_free(re2_matchdata* self) {
|
180
132
|
if (self->matches) {
|
181
133
|
delete[] self->matches;
|
182
134
|
}
|
183
135
|
free(self);
|
184
136
|
}
|
185
137
|
|
186
|
-
void re2_scanner_mark(re2_scanner* self) {
|
138
|
+
static void re2_scanner_mark(re2_scanner* self) {
|
187
139
|
rb_gc_mark(self->regexp);
|
188
140
|
rb_gc_mark(self->text);
|
189
141
|
}
|
190
142
|
|
191
|
-
void re2_scanner_free(re2_scanner* self) {
|
143
|
+
static void re2_scanner_free(re2_scanner* self) {
|
192
144
|
if (self->input) {
|
193
145
|
delete self->input;
|
194
146
|
}
|
195
147
|
free(self);
|
196
148
|
}
|
197
149
|
|
198
|
-
void re2_regexp_free(re2_pattern* self) {
|
150
|
+
static void re2_regexp_free(re2_pattern* self) {
|
199
151
|
if (self->pattern) {
|
200
152
|
delete self->pattern;
|
201
153
|
}
|
@@ -204,12 +156,14 @@ void re2_regexp_free(re2_pattern* self) {
|
|
204
156
|
|
205
157
|
static VALUE re2_matchdata_allocate(VALUE klass) {
|
206
158
|
re2_matchdata *m;
|
159
|
+
|
207
160
|
return Data_Make_Struct(klass, re2_matchdata, re2_matchdata_mark,
|
208
161
|
re2_matchdata_free, m);
|
209
162
|
}
|
210
163
|
|
211
164
|
static VALUE re2_scanner_allocate(VALUE klass) {
|
212
165
|
re2_scanner *c;
|
166
|
+
|
213
167
|
return Data_Make_Struct(klass, re2_scanner, re2_scanner_mark,
|
214
168
|
re2_scanner_free, c);
|
215
169
|
}
|
@@ -222,7 +176,7 @@ static VALUE re2_scanner_allocate(VALUE klass) {
|
|
222
176
|
* m = RE2::Regexp.new('(\d+)').match("bob 123")
|
223
177
|
* m.string #=> "bob 123"
|
224
178
|
*/
|
225
|
-
static VALUE re2_matchdata_string(VALUE self) {
|
179
|
+
static VALUE re2_matchdata_string(const VALUE self) {
|
226
180
|
re2_matchdata *m;
|
227
181
|
Data_Get_Struct(self, re2_matchdata, m);
|
228
182
|
|
@@ -237,7 +191,7 @@ static VALUE re2_matchdata_string(VALUE self) {
|
|
237
191
|
* c = RE2::Regexp.new('(\d+)').scan("foo")
|
238
192
|
* c.string #=> "foo"
|
239
193
|
*/
|
240
|
-
static VALUE re2_scanner_string(VALUE self) {
|
194
|
+
static VALUE re2_scanner_string(const VALUE self) {
|
241
195
|
re2_scanner *c;
|
242
196
|
Data_Get_Struct(self, re2_scanner, c);
|
243
197
|
|
@@ -252,7 +206,7 @@ static VALUE re2_scanner_string(VALUE self) {
|
|
252
206
|
* c = RE2::Regexp.new('(\d+)').scan("foo")
|
253
207
|
* c.eof? #=> true
|
254
208
|
*/
|
255
|
-
static VALUE re2_scanner_eof(VALUE self) {
|
209
|
+
static VALUE re2_scanner_eof(const VALUE self) {
|
256
210
|
re2_scanner *c;
|
257
211
|
Data_Get_Struct(self, re2_scanner, c);
|
258
212
|
|
@@ -274,7 +228,7 @@ static VALUE re2_scanner_rewind(VALUE self) {
|
|
274
228
|
re2_scanner *c;
|
275
229
|
Data_Get_Struct(self, re2_scanner, c);
|
276
230
|
|
277
|
-
c->input = new(nothrow) re2::StringPiece(StringValuePtr(c->text));
|
231
|
+
c->input = new(std::nothrow) re2::StringPiece(StringValuePtr(c->text));
|
278
232
|
c->eof = false;
|
279
233
|
|
280
234
|
return self;
|
@@ -284,6 +238,10 @@ static VALUE re2_scanner_rewind(VALUE self) {
|
|
284
238
|
* Scan the given text incrementally for matches, returning an array of
|
285
239
|
* matches on each subsequent call. Returns nil if no matches are found.
|
286
240
|
*
|
241
|
+
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
242
|
+
* returned in UTF-8 by default or ISO-8859-1 if the :utf8 option for the
|
243
|
+
* RE2::Regexp is set to false (any other encoding's behaviour is undefined).
|
244
|
+
*
|
287
245
|
* @return [Array<String>] the matches.
|
288
246
|
* @example
|
289
247
|
* s = RE2::Regexp.new('(\w+)').scan("Foo bar baz")
|
@@ -291,45 +249,41 @@ static VALUE re2_scanner_rewind(VALUE self) {
|
|
291
249
|
* s.scan #=> ["bar"]
|
292
250
|
*/
|
293
251
|
static VALUE re2_scanner_scan(VALUE self) {
|
294
|
-
int i;
|
295
|
-
size_t original_input_size, new_input_size;
|
296
|
-
bool input_advanced;
|
297
252
|
re2_pattern *p;
|
298
253
|
re2_scanner *c;
|
299
|
-
VALUE result;
|
300
254
|
|
301
255
|
Data_Get_Struct(self, re2_scanner, c);
|
302
256
|
Data_Get_Struct(c->regexp, re2_pattern, p);
|
303
257
|
|
304
|
-
vector<RE2::Arg> argv(c->number_of_capturing_groups);
|
305
|
-
vector<RE2::Arg*> args(c->number_of_capturing_groups);
|
306
|
-
vector<string> matches(c->number_of_capturing_groups);
|
258
|
+
std::vector<RE2::Arg> argv(c->number_of_capturing_groups);
|
259
|
+
std::vector<RE2::Arg*> args(c->number_of_capturing_groups);
|
260
|
+
std::vector<std::string> matches(c->number_of_capturing_groups);
|
307
261
|
|
308
262
|
if (c->eof) {
|
309
263
|
return Qnil;
|
310
264
|
}
|
311
265
|
|
312
|
-
original_input_size = c->input->size();
|
266
|
+
re2::StringPiece::size_type original_input_size = c->input->size();
|
313
267
|
|
314
|
-
for (i = 0; i < c->number_of_capturing_groups; i
|
315
|
-
matches[i] = "";
|
268
|
+
for (int i = 0; i < c->number_of_capturing_groups; ++i) {
|
316
269
|
argv[i] = &matches[i];
|
317
270
|
args[i] = &argv[i];
|
318
271
|
}
|
319
272
|
|
320
273
|
if (RE2::FindAndConsumeN(c->input, *p->pattern, &args[0],
|
321
274
|
c->number_of_capturing_groups)) {
|
322
|
-
|
323
|
-
|
324
|
-
input_advanced = new_input_size < original_input_size;
|
275
|
+
re2::StringPiece::size_type new_input_size = c->input->size();
|
276
|
+
bool input_advanced = new_input_size < original_input_size;
|
325
277
|
|
326
|
-
|
278
|
+
VALUE result = rb_ary_new2(c->number_of_capturing_groups);
|
279
|
+
|
280
|
+
for (int i = 0; i < c->number_of_capturing_groups; ++i) {
|
327
281
|
if (matches[i].empty()) {
|
328
282
|
rb_ary_push(result, Qnil);
|
329
283
|
} else {
|
330
|
-
rb_ary_push(result,
|
284
|
+
rb_ary_push(result, encoded_str_new(matches[i].data(),
|
331
285
|
matches[i].size(),
|
332
|
-
p->pattern->options().encoding()
|
286
|
+
p->pattern->options().encoding()));
|
333
287
|
}
|
334
288
|
}
|
335
289
|
|
@@ -340,47 +294,41 @@ static VALUE re2_scanner_scan(VALUE self) {
|
|
340
294
|
if (!input_advanced && new_input_size > 0) {
|
341
295
|
c->input->remove_prefix(1);
|
342
296
|
}
|
297
|
+
|
298
|
+
return result;
|
343
299
|
} else {
|
344
|
-
|
300
|
+
return Qnil;
|
345
301
|
}
|
346
|
-
|
347
|
-
return result;
|
348
302
|
}
|
349
303
|
|
350
304
|
/*
|
351
305
|
* Retrieve a matchdata by index or name.
|
352
306
|
*/
|
353
|
-
re2::StringPiece *re2_matchdata_find_match(VALUE idx, VALUE self) {
|
354
|
-
int id;
|
307
|
+
static re2::StringPiece *re2_matchdata_find_match(VALUE idx, const VALUE self) {
|
355
308
|
re2_matchdata *m;
|
356
309
|
re2_pattern *p;
|
357
|
-
map<string, int> groups;
|
358
|
-
string name;
|
359
|
-
re2::StringPiece *match;
|
360
310
|
|
361
311
|
Data_Get_Struct(self, re2_matchdata, m);
|
362
312
|
Data_Get_Struct(m->regexp, re2_pattern, p);
|
363
313
|
|
314
|
+
int id;
|
315
|
+
|
364
316
|
if (FIXNUM_P(idx)) {
|
365
317
|
id = FIX2INT(idx);
|
366
318
|
} else {
|
367
|
-
|
368
|
-
|
369
|
-
|
370
|
-
name = StringValuePtr(idx);
|
371
|
-
}
|
372
|
-
|
373
|
-
groups = p->pattern->NamedCapturingGroups();
|
319
|
+
const char *name = SYMBOL_P(idx) ? rb_id2name(SYM2ID(idx)) : StringValuePtr(idx);
|
320
|
+
const std::map<std::string, int>& groups = p->pattern->NamedCapturingGroups();
|
321
|
+
std::map<std::string, int>::const_iterator search = groups.find(name);
|
374
322
|
|
375
|
-
if (groups.
|
376
|
-
id =
|
323
|
+
if (search != groups.end()) {
|
324
|
+
id = search->second;
|
377
325
|
} else {
|
378
326
|
return NULL;
|
379
327
|
}
|
380
328
|
}
|
381
329
|
|
382
330
|
if (id >= 0 && id < m->number_of_matches) {
|
383
|
-
match = &m->matches[id];
|
331
|
+
re2::StringPiece *match = &m->matches[id];
|
384
332
|
|
385
333
|
if (!match->empty()) {
|
386
334
|
return match;
|
@@ -399,7 +347,7 @@ re2::StringPiece *re2_matchdata_find_match(VALUE idx, VALUE self) {
|
|
399
347
|
* m.size #=> 2
|
400
348
|
* m.length #=> 2
|
401
349
|
*/
|
402
|
-
static VALUE re2_matchdata_size(VALUE self) {
|
350
|
+
static VALUE re2_matchdata_size(const VALUE self) {
|
403
351
|
re2_matchdata *m;
|
404
352
|
Data_Get_Struct(self, re2_matchdata, m);
|
405
353
|
|
@@ -416,23 +364,18 @@ static VALUE re2_matchdata_size(VALUE self) {
|
|
416
364
|
* m.begin(0) #=> 1
|
417
365
|
* m.begin(1) #=> 4
|
418
366
|
*/
|
419
|
-
static VALUE re2_matchdata_begin(VALUE self, VALUE n) {
|
367
|
+
static VALUE re2_matchdata_begin(const VALUE self, VALUE n) {
|
420
368
|
re2_matchdata *m;
|
421
|
-
re2_pattern *p;
|
422
|
-
re2::StringPiece *match;
|
423
|
-
long offset;
|
424
369
|
|
425
370
|
Data_Get_Struct(self, re2_matchdata, m);
|
426
|
-
Data_Get_Struct(m->regexp, re2_pattern, p);
|
427
371
|
|
428
|
-
match = re2_matchdata_find_match(n, self);
|
372
|
+
re2::StringPiece *match = re2_matchdata_find_match(n, self);
|
429
373
|
if (match == NULL) {
|
430
374
|
return Qnil;
|
431
375
|
} else {
|
432
|
-
offset =
|
376
|
+
long offset = match->data() - StringValuePtr(m->text);
|
433
377
|
|
434
|
-
return
|
435
|
-
p->pattern->options().encoding() == RE2::Options::EncodingUTF8 ? "UTF-8" : "ISO-8859-1");
|
378
|
+
return LONG2NUM(rb_str_sublen(StringValue(m->text), offset));
|
436
379
|
}
|
437
380
|
}
|
438
381
|
|
@@ -446,24 +389,18 @@ static VALUE re2_matchdata_begin(VALUE self, VALUE n) {
|
|
446
389
|
* m.end(0) #=> 9
|
447
390
|
* m.end(1) #=> 7
|
448
391
|
*/
|
449
|
-
static VALUE re2_matchdata_end(VALUE self, VALUE n) {
|
392
|
+
static VALUE re2_matchdata_end(const VALUE self, VALUE n) {
|
450
393
|
re2_matchdata *m;
|
451
|
-
re2_pattern *p;
|
452
|
-
re2::StringPiece *match;
|
453
|
-
long offset;
|
454
394
|
|
455
395
|
Data_Get_Struct(self, re2_matchdata, m);
|
456
|
-
Data_Get_Struct(m->regexp, re2_pattern, p);
|
457
|
-
|
458
|
-
match = re2_matchdata_find_match(n, self);
|
459
396
|
|
397
|
+
re2::StringPiece *match = re2_matchdata_find_match(n, self);
|
460
398
|
if (match == NULL) {
|
461
399
|
return Qnil;
|
462
400
|
} else {
|
463
|
-
offset =
|
401
|
+
long offset = (match->data() - StringValuePtr(m->text)) + match->size();
|
464
402
|
|
465
|
-
return
|
466
|
-
p->pattern->options().encoding() == RE2::Options::EncodingUTF8 ? "UTF-8" : "ISO-8859-1");
|
403
|
+
return LONG2NUM(rb_str_sublen(StringValue(m->text), offset));
|
467
404
|
}
|
468
405
|
}
|
469
406
|
|
@@ -475,9 +412,10 @@ static VALUE re2_matchdata_end(VALUE self, VALUE n) {
|
|
475
412
|
* m = RE2::Regexp.new('(\d+)').match("bob 123")
|
476
413
|
* m.regexp #=> #<RE2::Regexp /(\d+)/>
|
477
414
|
*/
|
478
|
-
static VALUE re2_matchdata_regexp(VALUE self) {
|
415
|
+
static VALUE re2_matchdata_regexp(const VALUE self) {
|
479
416
|
re2_matchdata *m;
|
480
417
|
Data_Get_Struct(self, re2_matchdata, m);
|
418
|
+
|
481
419
|
return m->regexp;
|
482
420
|
}
|
483
421
|
|
@@ -489,7 +427,7 @@ static VALUE re2_matchdata_regexp(VALUE self) {
|
|
489
427
|
* c = RE2::Regexp.new('(\d+)').scan("bob 123")
|
490
428
|
* c.regexp #=> #<RE2::Regexp /(\d+)/>
|
491
429
|
*/
|
492
|
-
static VALUE re2_scanner_regexp(VALUE self) {
|
430
|
+
static VALUE re2_scanner_regexp(const VALUE self) {
|
493
431
|
re2_scanner *c;
|
494
432
|
Data_Get_Struct(self, re2_scanner, c);
|
495
433
|
|
@@ -498,46 +436,47 @@ static VALUE re2_scanner_regexp(VALUE self) {
|
|
498
436
|
|
499
437
|
static VALUE re2_regexp_allocate(VALUE klass) {
|
500
438
|
re2_pattern *p;
|
439
|
+
|
501
440
|
return Data_Make_Struct(klass, re2_pattern, 0, re2_regexp_free, p);
|
502
441
|
}
|
503
442
|
|
504
443
|
/*
|
505
444
|
* Returns the array of matches.
|
506
445
|
*
|
446
|
+
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
447
|
+
* returned in UTF-8 by default or ISO-8859-1 if the :utf8 option for the
|
448
|
+
* RE2::Regexp is set to false (any other encoding's behaviour is undefined).
|
449
|
+
*
|
507
450
|
* @return [Array<String, nil>] the array of matches
|
508
451
|
* @example
|
509
452
|
* m = RE2::Regexp.new('(\d+)').match("bob 123")
|
510
453
|
* m.to_a #=> ["123", "123"]
|
511
454
|
*/
|
512
|
-
static VALUE re2_matchdata_to_a(VALUE self) {
|
513
|
-
int i;
|
455
|
+
static VALUE re2_matchdata_to_a(const VALUE self) {
|
514
456
|
re2_matchdata *m;
|
515
457
|
re2_pattern *p;
|
516
|
-
re2::StringPiece *match;
|
517
|
-
VALUE array;
|
518
458
|
|
519
459
|
Data_Get_Struct(self, re2_matchdata, m);
|
520
460
|
Data_Get_Struct(m->regexp, re2_pattern, p);
|
521
461
|
|
522
|
-
array = rb_ary_new2(m->number_of_matches);
|
523
|
-
for (i = 0; i < m->number_of_matches; i
|
524
|
-
match = &m->matches[i];
|
462
|
+
VALUE array = rb_ary_new2(m->number_of_matches);
|
463
|
+
for (int i = 0; i < m->number_of_matches; ++i) {
|
464
|
+
re2::StringPiece *match = &m->matches[i];
|
525
465
|
|
526
466
|
if (match->empty()) {
|
527
467
|
rb_ary_push(array, Qnil);
|
528
468
|
} else {
|
529
|
-
rb_ary_push(array,
|
530
|
-
p->pattern->options().encoding()
|
469
|
+
rb_ary_push(array, encoded_str_new(match->data(), match->size(),
|
470
|
+
p->pattern->options().encoding()));
|
531
471
|
}
|
532
472
|
}
|
533
473
|
|
534
474
|
return array;
|
535
475
|
}
|
536
476
|
|
537
|
-
static VALUE re2_matchdata_nth_match(int nth, VALUE self) {
|
477
|
+
static VALUE re2_matchdata_nth_match(int nth, const VALUE self) {
|
538
478
|
re2_matchdata *m;
|
539
479
|
re2_pattern *p;
|
540
|
-
re2::StringPiece *match;
|
541
480
|
|
542
481
|
Data_Get_Struct(self, re2_matchdata, m);
|
543
482
|
Data_Get_Struct(m->regexp, re2_pattern, p);
|
@@ -545,32 +484,29 @@ static VALUE re2_matchdata_nth_match(int nth, VALUE self) {
|
|
545
484
|
if (nth < 0 || nth >= m->number_of_matches) {
|
546
485
|
return Qnil;
|
547
486
|
} else {
|
548
|
-
match = &m->matches[nth];
|
487
|
+
re2::StringPiece *match = &m->matches[nth];
|
549
488
|
|
550
489
|
if (match->empty()) {
|
551
490
|
return Qnil;
|
552
491
|
} else {
|
553
|
-
return
|
554
|
-
p->pattern->options().encoding()
|
492
|
+
return encoded_str_new(match->data(), match->size(),
|
493
|
+
p->pattern->options().encoding());
|
555
494
|
}
|
556
495
|
}
|
557
496
|
}
|
558
497
|
|
559
|
-
static VALUE re2_matchdata_named_match(const char* name, VALUE self) {
|
560
|
-
int idx;
|
498
|
+
static VALUE re2_matchdata_named_match(const char* name, const VALUE self) {
|
561
499
|
re2_matchdata *m;
|
562
500
|
re2_pattern *p;
|
563
|
-
map<string, int> groups;
|
564
|
-
string name_as_string(name);
|
565
501
|
|
566
502
|
Data_Get_Struct(self, re2_matchdata, m);
|
567
503
|
Data_Get_Struct(m->regexp, re2_pattern, p);
|
568
504
|
|
569
|
-
groups = p->pattern->NamedCapturingGroups();
|
505
|
+
const std::map<std::string, int>& groups = p->pattern->NamedCapturingGroups();
|
506
|
+
std::map<std::string, int>::const_iterator search = groups.find(name);
|
570
507
|
|
571
|
-
if (groups.
|
572
|
-
|
573
|
-
return re2_matchdata_nth_match(idx, self);
|
508
|
+
if (search != groups.end()) {
|
509
|
+
return re2_matchdata_nth_match(search->second, self);
|
574
510
|
} else {
|
575
511
|
return Qnil;
|
576
512
|
}
|
@@ -579,6 +515,10 @@ static VALUE re2_matchdata_named_match(const char* name, VALUE self) {
|
|
579
515
|
/*
|
580
516
|
* Retrieve zero, one or more matches by index or name.
|
581
517
|
*
|
518
|
+
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
519
|
+
* returned in UTF-8 by default or ISO-8859-1 if the :utf8 option for the
|
520
|
+
* RE2::Regexp is set to false (any other encoding's behaviour is undefined).
|
521
|
+
*
|
582
522
|
* @return [Array<String, nil>, String, Boolean]
|
583
523
|
*
|
584
524
|
* @overload [](index)
|
@@ -619,7 +559,7 @@ static VALUE re2_matchdata_named_match(const char* name, VALUE self) {
|
|
619
559
|
* m["number"] #=> "123"
|
620
560
|
* m[:number] #=> "123"
|
621
561
|
*/
|
622
|
-
static VALUE re2_matchdata_aref(int argc, VALUE *argv, VALUE self) {
|
562
|
+
static VALUE re2_matchdata_aref(int argc, VALUE *argv, const VALUE self) {
|
623
563
|
VALUE idx, rest;
|
624
564
|
rb_scan_args(argc, argv, "11", &idx, &rest);
|
625
565
|
|
@@ -639,38 +579,40 @@ static VALUE re2_matchdata_aref(int argc, VALUE *argv, VALUE self) {
|
|
639
579
|
*
|
640
580
|
* @return [String] the entire matched string
|
641
581
|
*/
|
642
|
-
static VALUE re2_matchdata_to_s(VALUE self) {
|
582
|
+
static VALUE re2_matchdata_to_s(const VALUE self) {
|
643
583
|
return re2_matchdata_nth_match(0, self);
|
644
584
|
}
|
645
585
|
|
646
586
|
/*
|
647
587
|
* Returns a printable version of the match.
|
648
588
|
*
|
589
|
+
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
590
|
+
* returned in UTF-8 by default or ISO-8859-1 if the :utf8 option for the
|
591
|
+
* RE2::Regexp is set to false (any other encoding's behaviour is undefined).
|
592
|
+
*
|
649
593
|
* @return [String] a printable version of the match
|
650
594
|
* @example
|
651
595
|
* m = RE2::Regexp.new('(\d+)').match("bob 123")
|
652
596
|
* m.inspect #=> "#<RE2::MatchData \"123\" 1:\"123\">"
|
653
597
|
*/
|
654
|
-
static VALUE re2_matchdata_inspect(VALUE self) {
|
655
|
-
int i;
|
598
|
+
static VALUE re2_matchdata_inspect(const VALUE self) {
|
656
599
|
re2_matchdata *m;
|
657
600
|
re2_pattern *p;
|
658
|
-
VALUE match, result;
|
659
|
-
ostringstream output;
|
660
601
|
|
661
602
|
Data_Get_Struct(self, re2_matchdata, m);
|
662
603
|
Data_Get_Struct(m->regexp, re2_pattern, p);
|
663
604
|
|
605
|
+
std::ostringstream output;
|
664
606
|
output << "#<RE2::MatchData";
|
665
607
|
|
666
|
-
for (i = 0; i < m->number_of_matches; i
|
608
|
+
for (int i = 0; i < m->number_of_matches; ++i) {
|
667
609
|
output << " ";
|
668
610
|
|
669
611
|
if (i > 0) {
|
670
612
|
output << i << ":";
|
671
613
|
}
|
672
614
|
|
673
|
-
match = re2_matchdata_nth_match(i, self);
|
615
|
+
VALUE match = re2_matchdata_nth_match(i, self);
|
674
616
|
|
675
617
|
if (match == Qnil) {
|
676
618
|
output << "nil";
|
@@ -681,15 +623,17 @@ static VALUE re2_matchdata_inspect(VALUE self) {
|
|
681
623
|
|
682
624
|
output << ">";
|
683
625
|
|
684
|
-
|
685
|
-
p->pattern->options().encoding()
|
686
|
-
|
687
|
-
return result;
|
626
|
+
return encoded_str_new(output.str().data(), output.str().length(),
|
627
|
+
p->pattern->options().encoding());
|
688
628
|
}
|
689
629
|
|
690
630
|
/*
|
691
631
|
* Returns the array of submatches for pattern matching.
|
692
632
|
*
|
633
|
+
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
634
|
+
* returned in UTF-8 by default or ISO-8859-1 if the :utf8 option for the
|
635
|
+
* RE2::Regexp is set to false (any other encoding's behaviour is undefined).
|
636
|
+
*
|
693
637
|
* @return [Array<String, nil>] the array of submatches
|
694
638
|
* @example
|
695
639
|
* m = RE2::Regexp.new('(\d+)').match("bob 123")
|
@@ -703,25 +647,22 @@ static VALUE re2_matchdata_inspect(VALUE self) {
|
|
703
647
|
* puts "Unrecognised match"
|
704
648
|
* end
|
705
649
|
*/
|
706
|
-
static VALUE re2_matchdata_deconstruct(VALUE self) {
|
707
|
-
int i;
|
650
|
+
static VALUE re2_matchdata_deconstruct(const VALUE self) {
|
708
651
|
re2_matchdata *m;
|
709
652
|
re2_pattern *p;
|
710
|
-
re2::StringPiece *match;
|
711
|
-
VALUE array;
|
712
653
|
|
713
654
|
Data_Get_Struct(self, re2_matchdata, m);
|
714
655
|
Data_Get_Struct(m->regexp, re2_pattern, p);
|
715
656
|
|
716
|
-
array = rb_ary_new2(m->number_of_matches - 1);
|
717
|
-
for (i = 1; i < m->number_of_matches; i
|
718
|
-
match = &m->matches[i];
|
657
|
+
VALUE array = rb_ary_new2(m->number_of_matches - 1);
|
658
|
+
for (int i = 1; i < m->number_of_matches; ++i) {
|
659
|
+
re2::StringPiece *match = &m->matches[i];
|
719
660
|
|
720
661
|
if (match->empty()) {
|
721
662
|
rb_ary_push(array, Qnil);
|
722
663
|
} else {
|
723
|
-
rb_ary_push(array,
|
724
|
-
p->pattern->options().encoding()
|
664
|
+
rb_ary_push(array, encoded_str_new(match->data(), match->size(),
|
665
|
+
p->pattern->options().encoding()));
|
725
666
|
}
|
726
667
|
}
|
727
668
|
|
@@ -735,6 +676,10 @@ static VALUE re2_matchdata_deconstruct(VALUE self) {
|
|
735
676
|
* more keys than there are capturing groups. Given keys will populate the hash in
|
736
677
|
* order but an invalid name will cause the hash to be immediately returned.
|
737
678
|
*
|
679
|
+
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
680
|
+
* returned in UTF-8 by default or ISO-8859-1 if the :utf8 option for the
|
681
|
+
* RE2::Regexp is set to false (any other encoding's behaviour is undefined).
|
682
|
+
*
|
738
683
|
* @return [Hash] a hash of capturing group names to submatches
|
739
684
|
* @param [Array<Symbol>, nil] keys an array of Symbol capturing group names or nil to return all names
|
740
685
|
* @example
|
@@ -752,40 +697,37 @@ static VALUE re2_matchdata_deconstruct(VALUE self) {
|
|
752
697
|
* puts "Unrecognised match"
|
753
698
|
* end
|
754
699
|
*/
|
755
|
-
static VALUE re2_matchdata_deconstruct_keys(VALUE self, VALUE keys) {
|
756
|
-
int i;
|
757
|
-
VALUE capturing_groups, key;
|
700
|
+
static VALUE re2_matchdata_deconstruct_keys(const VALUE self, const VALUE keys) {
|
758
701
|
re2_matchdata *m;
|
759
702
|
re2_pattern *p;
|
760
|
-
map<string, int> groups;
|
761
|
-
map<string, int>::iterator iterator;
|
762
703
|
|
763
704
|
Data_Get_Struct(self, re2_matchdata, m);
|
764
705
|
Data_Get_Struct(m->regexp, re2_pattern, p);
|
765
706
|
|
766
|
-
groups = p->pattern->NamedCapturingGroups();
|
767
|
-
capturing_groups = rb_hash_new();
|
707
|
+
const std::map<std::string, int>& groups = p->pattern->NamedCapturingGroups();
|
708
|
+
VALUE capturing_groups = rb_hash_new();
|
768
709
|
|
769
710
|
if (NIL_P(keys)) {
|
770
|
-
for (
|
711
|
+
for (std::map<std::string, int>::const_iterator it = groups.begin(); it != groups.end(); ++it) {
|
771
712
|
rb_hash_aset(capturing_groups,
|
772
|
-
ID2SYM(rb_intern(
|
773
|
-
re2_matchdata_nth_match(
|
713
|
+
ID2SYM(rb_intern(it->first.data())),
|
714
|
+
re2_matchdata_nth_match(it->second, self));
|
774
715
|
}
|
775
716
|
} else {
|
776
717
|
Check_Type(keys, T_ARRAY);
|
777
718
|
|
778
719
|
if (p->pattern->NumberOfCapturingGroups() >= RARRAY_LEN(keys)) {
|
779
|
-
for (i = 0; i < RARRAY_LEN(keys); i
|
780
|
-
key = rb_ary_entry(keys, i);
|
720
|
+
for (int i = 0; i < RARRAY_LEN(keys); ++i) {
|
721
|
+
VALUE key = rb_ary_entry(keys, i);
|
781
722
|
Check_Type(key, T_SYMBOL);
|
782
|
-
|
723
|
+
const char *name = rb_id2name(SYM2ID(key));
|
724
|
+
std::map<std::string, int>::const_iterator search = groups.find(name);
|
783
725
|
|
784
|
-
if (groups.
|
726
|
+
if (search != groups.end()) {
|
727
|
+
rb_hash_aset(capturing_groups, key, re2_matchdata_nth_match(search->second, self));
|
728
|
+
} else {
|
785
729
|
break;
|
786
730
|
}
|
787
|
-
|
788
|
-
rb_hash_aset(capturing_groups, key, re2_matchdata_nth_match(groups[name], self));
|
789
731
|
}
|
790
732
|
}
|
791
733
|
}
|
@@ -802,6 +744,7 @@ static VALUE re2_matchdata_deconstruct_keys(VALUE self, VALUE keys) {
|
|
802
744
|
*/
|
803
745
|
static VALUE re2_re2(int argc, VALUE *argv, VALUE self) {
|
804
746
|
UNUSED(self);
|
747
|
+
|
805
748
|
return rb_class_new_instance(argc, argv, re2_cRegexp);
|
806
749
|
}
|
807
750
|
|
@@ -849,11 +792,11 @@ static VALUE re2_regexp_initialize(int argc, VALUE *argv, VALUE self) {
|
|
849
792
|
|
850
793
|
if (RTEST(options)) {
|
851
794
|
RE2::Options re2_options;
|
852
|
-
parse_re2_options(re2_options, options);
|
795
|
+
parse_re2_options(&re2_options, options);
|
853
796
|
|
854
|
-
p->pattern = new(nothrow) RE2(StringValuePtr(pattern), re2_options);
|
797
|
+
p->pattern = new(std::nothrow) RE2(StringValuePtr(pattern), re2_options);
|
855
798
|
} else {
|
856
|
-
p->pattern = new(nothrow) RE2(StringValuePtr(pattern));
|
799
|
+
p->pattern = new(std::nothrow) RE2(StringValuePtr(pattern));
|
857
800
|
}
|
858
801
|
|
859
802
|
if (p->pattern == 0) {
|
@@ -866,40 +809,47 @@ static VALUE re2_regexp_initialize(int argc, VALUE *argv, VALUE self) {
|
|
866
809
|
/*
|
867
810
|
* Returns a printable version of the regular expression +re2+.
|
868
811
|
*
|
812
|
+
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
813
|
+
* returned in UTF-8 by default or ISO-8859-1 if the :utf8 option for the
|
814
|
+
* RE2::Regexp is set to false (any other encoding's behaviour is undefined).
|
815
|
+
*
|
869
816
|
* @return [String] a printable version of the regular expression
|
870
817
|
* @example
|
871
818
|
* re2 = RE2::Regexp.new("woo?")
|
872
819
|
* re2.inspect #=> "#<RE2::Regexp /woo?/>"
|
873
820
|
*/
|
874
|
-
static VALUE re2_regexp_inspect(VALUE self) {
|
821
|
+
static VALUE re2_regexp_inspect(const VALUE self) {
|
875
822
|
re2_pattern *p;
|
876
|
-
VALUE result;
|
877
|
-
ostringstream output;
|
878
823
|
|
879
824
|
Data_Get_Struct(self, re2_pattern, p);
|
880
825
|
|
881
|
-
|
826
|
+
std::ostringstream output;
|
882
827
|
|
883
|
-
|
884
|
-
p->pattern->options().encoding() == RE2::Options::EncodingUTF8 ? "UTF-8" : "ISO-8859-1");
|
828
|
+
output << "#<RE2::Regexp /" << p->pattern->pattern() << "/>";
|
885
829
|
|
886
|
-
return
|
830
|
+
return encoded_str_new(output.str().data(), output.str().length(),
|
831
|
+
p->pattern->options().encoding());
|
887
832
|
}
|
888
833
|
|
889
834
|
/*
|
890
835
|
* Returns a string version of the regular expression +re2+.
|
891
836
|
*
|
837
|
+
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
838
|
+
* returned in UTF-8 by default or ISO-8859-1 if the :utf8 option for the
|
839
|
+
* RE2::Regexp is set to false (any other encoding's behaviour is undefined).
|
840
|
+
*
|
892
841
|
* @return [String] a string version of the regular expression
|
893
842
|
* @example
|
894
843
|
* re2 = RE2::Regexp.new("woo?")
|
895
844
|
* re2.to_s #=> "woo?"
|
896
845
|
*/
|
897
|
-
static VALUE re2_regexp_to_s(VALUE self) {
|
846
|
+
static VALUE re2_regexp_to_s(const VALUE self) {
|
898
847
|
re2_pattern *p;
|
899
848
|
Data_Get_Struct(self, re2_pattern, p);
|
900
|
-
|
849
|
+
|
850
|
+
return encoded_str_new(p->pattern->pattern().data(),
|
901
851
|
p->pattern->pattern().size(),
|
902
|
-
p->pattern->options().encoding()
|
852
|
+
p->pattern->options().encoding());
|
903
853
|
}
|
904
854
|
|
905
855
|
/*
|
@@ -911,9 +861,10 @@ static VALUE re2_regexp_to_s(VALUE self) {
|
|
911
861
|
* re2 = RE2::Regexp.new("woo?")
|
912
862
|
* re2.ok? #=> true
|
913
863
|
*/
|
914
|
-
static VALUE re2_regexp_ok(VALUE self) {
|
864
|
+
static VALUE re2_regexp_ok(const VALUE self) {
|
915
865
|
re2_pattern *p;
|
916
866
|
Data_Get_Struct(self, re2_pattern, p);
|
867
|
+
|
917
868
|
return BOOL2RUBY(p->pattern->ok());
|
918
869
|
}
|
919
870
|
|
@@ -926,9 +877,10 @@ static VALUE re2_regexp_ok(VALUE self) {
|
|
926
877
|
* re2 = RE2::Regexp.new("woo?", :utf8 => true)
|
927
878
|
* re2.utf8? #=> true
|
928
879
|
*/
|
929
|
-
static VALUE re2_regexp_utf8(VALUE self) {
|
880
|
+
static VALUE re2_regexp_utf8(const VALUE self) {
|
930
881
|
re2_pattern *p;
|
931
882
|
Data_Get_Struct(self, re2_pattern, p);
|
883
|
+
|
932
884
|
return BOOL2RUBY(p->pattern->options().encoding() == RE2::Options::EncodingUTF8);
|
933
885
|
}
|
934
886
|
|
@@ -941,9 +893,10 @@ static VALUE re2_regexp_utf8(VALUE self) {
|
|
941
893
|
* re2 = RE2::Regexp.new("woo?", :posix_syntax => true)
|
942
894
|
* re2.posix_syntax? #=> true
|
943
895
|
*/
|
944
|
-
static VALUE re2_regexp_posix_syntax(VALUE self) {
|
896
|
+
static VALUE re2_regexp_posix_syntax(const VALUE self) {
|
945
897
|
re2_pattern *p;
|
946
898
|
Data_Get_Struct(self, re2_pattern, p);
|
899
|
+
|
947
900
|
return BOOL2RUBY(p->pattern->options().posix_syntax());
|
948
901
|
}
|
949
902
|
|
@@ -956,9 +909,10 @@ static VALUE re2_regexp_posix_syntax(VALUE self) {
|
|
956
909
|
* re2 = RE2::Regexp.new("woo?", :longest_match => true)
|
957
910
|
* re2.longest_match? #=> true
|
958
911
|
*/
|
959
|
-
static VALUE re2_regexp_longest_match(VALUE self) {
|
912
|
+
static VALUE re2_regexp_longest_match(const VALUE self) {
|
960
913
|
re2_pattern *p;
|
961
914
|
Data_Get_Struct(self, re2_pattern, p);
|
915
|
+
|
962
916
|
return BOOL2RUBY(p->pattern->options().longest_match());
|
963
917
|
}
|
964
918
|
|
@@ -971,9 +925,10 @@ static VALUE re2_regexp_longest_match(VALUE self) {
|
|
971
925
|
* re2 = RE2::Regexp.new("woo?", :log_errors => true)
|
972
926
|
* re2.log_errors? #=> true
|
973
927
|
*/
|
974
|
-
static VALUE re2_regexp_log_errors(VALUE self) {
|
928
|
+
static VALUE re2_regexp_log_errors(const VALUE self) {
|
975
929
|
re2_pattern *p;
|
976
930
|
Data_Get_Struct(self, re2_pattern, p);
|
931
|
+
|
977
932
|
return BOOL2RUBY(p->pattern->options().log_errors());
|
978
933
|
}
|
979
934
|
|
@@ -986,9 +941,10 @@ static VALUE re2_regexp_log_errors(VALUE self) {
|
|
986
941
|
* re2 = RE2::Regexp.new("woo?", :max_mem => 1024)
|
987
942
|
* re2.max_mem #=> 1024
|
988
943
|
*/
|
989
|
-
static VALUE re2_regexp_max_mem(VALUE self) {
|
944
|
+
static VALUE re2_regexp_max_mem(const VALUE self) {
|
990
945
|
re2_pattern *p;
|
991
946
|
Data_Get_Struct(self, re2_pattern, p);
|
947
|
+
|
992
948
|
return INT2FIX(p->pattern->options().max_mem());
|
993
949
|
}
|
994
950
|
|
@@ -1001,9 +957,10 @@ static VALUE re2_regexp_max_mem(VALUE self) {
|
|
1001
957
|
* re2 = RE2::Regexp.new("woo?", :literal => true)
|
1002
958
|
* re2.literal? #=> true
|
1003
959
|
*/
|
1004
|
-
static VALUE re2_regexp_literal(VALUE self) {
|
960
|
+
static VALUE re2_regexp_literal(const VALUE self) {
|
1005
961
|
re2_pattern *p;
|
1006
962
|
Data_Get_Struct(self, re2_pattern, p);
|
963
|
+
|
1007
964
|
return BOOL2RUBY(p->pattern->options().literal());
|
1008
965
|
}
|
1009
966
|
|
@@ -1016,9 +973,10 @@ static VALUE re2_regexp_literal(VALUE self) {
|
|
1016
973
|
* re2 = RE2::Regexp.new("woo?", :never_nl => true)
|
1017
974
|
* re2.never_nl? #=> true
|
1018
975
|
*/
|
1019
|
-
static VALUE re2_regexp_never_nl(VALUE self) {
|
976
|
+
static VALUE re2_regexp_never_nl(const VALUE self) {
|
1020
977
|
re2_pattern *p;
|
1021
978
|
Data_Get_Struct(self, re2_pattern, p);
|
979
|
+
|
1022
980
|
return BOOL2RUBY(p->pattern->options().never_nl());
|
1023
981
|
}
|
1024
982
|
|
@@ -1031,9 +989,10 @@ static VALUE re2_regexp_never_nl(VALUE self) {
|
|
1031
989
|
* re2 = RE2::Regexp.new("woo?", :case_sensitive => true)
|
1032
990
|
* re2.case_sensitive? #=> true
|
1033
991
|
*/
|
1034
|
-
static VALUE re2_regexp_case_sensitive(VALUE self) {
|
992
|
+
static VALUE re2_regexp_case_sensitive(const VALUE self) {
|
1035
993
|
re2_pattern *p;
|
1036
994
|
Data_Get_Struct(self, re2_pattern, p);
|
995
|
+
|
1037
996
|
return BOOL2RUBY(p->pattern->options().case_sensitive());
|
1038
997
|
}
|
1039
998
|
|
@@ -1047,7 +1006,7 @@ static VALUE re2_regexp_case_sensitive(VALUE self) {
|
|
1047
1006
|
* re2.case_insensitive? #=> false
|
1048
1007
|
* re2.casefold? #=> false
|
1049
1008
|
*/
|
1050
|
-
static VALUE re2_regexp_case_insensitive(VALUE self) {
|
1009
|
+
static VALUE re2_regexp_case_insensitive(const VALUE self) {
|
1051
1010
|
return BOOL2RUBY(re2_regexp_case_sensitive(self) != Qtrue);
|
1052
1011
|
}
|
1053
1012
|
|
@@ -1060,9 +1019,10 @@ static VALUE re2_regexp_case_insensitive(VALUE self) {
|
|
1060
1019
|
* re2 = RE2::Regexp.new("woo?", :perl_classes => true)
|
1061
1020
|
* re2.perl_classes? #=> true
|
1062
1021
|
*/
|
1063
|
-
static VALUE re2_regexp_perl_classes(VALUE self) {
|
1022
|
+
static VALUE re2_regexp_perl_classes(const VALUE self) {
|
1064
1023
|
re2_pattern *p;
|
1065
1024
|
Data_Get_Struct(self, re2_pattern, p);
|
1025
|
+
|
1066
1026
|
return BOOL2RUBY(p->pattern->options().perl_classes());
|
1067
1027
|
}
|
1068
1028
|
|
@@ -1075,9 +1035,10 @@ static VALUE re2_regexp_perl_classes(VALUE self) {
|
|
1075
1035
|
* re2 = RE2::Regexp.new("woo?", :word_boundary => true)
|
1076
1036
|
* re2.word_boundary? #=> true
|
1077
1037
|
*/
|
1078
|
-
static VALUE re2_regexp_word_boundary(VALUE self) {
|
1038
|
+
static VALUE re2_regexp_word_boundary(const VALUE self) {
|
1079
1039
|
re2_pattern *p;
|
1080
1040
|
Data_Get_Struct(self, re2_pattern, p);
|
1041
|
+
|
1081
1042
|
return BOOL2RUBY(p->pattern->options().word_boundary());
|
1082
1043
|
}
|
1083
1044
|
|
@@ -1090,9 +1051,10 @@ static VALUE re2_regexp_word_boundary(VALUE self) {
|
|
1090
1051
|
* re2 = RE2::Regexp.new("woo?", :one_line => true)
|
1091
1052
|
* re2.one_line? #=> true
|
1092
1053
|
*/
|
1093
|
-
static VALUE re2_regexp_one_line(VALUE self) {
|
1054
|
+
static VALUE re2_regexp_one_line(const VALUE self) {
|
1094
1055
|
re2_pattern *p;
|
1095
1056
|
Data_Get_Struct(self, re2_pattern, p);
|
1057
|
+
|
1096
1058
|
return BOOL2RUBY(p->pattern->options().one_line());
|
1097
1059
|
}
|
1098
1060
|
|
@@ -1102,9 +1064,10 @@ static VALUE re2_regexp_one_line(VALUE self) {
|
|
1102
1064
|
*
|
1103
1065
|
* @return [String, nil] the error string or nil
|
1104
1066
|
*/
|
1105
|
-
static VALUE re2_regexp_error(VALUE self) {
|
1067
|
+
static VALUE re2_regexp_error(const VALUE self) {
|
1106
1068
|
re2_pattern *p;
|
1107
1069
|
Data_Get_Struct(self, re2_pattern, p);
|
1070
|
+
|
1108
1071
|
if (p->pattern->ok()) {
|
1109
1072
|
return Qnil;
|
1110
1073
|
} else {
|
@@ -1116,17 +1079,22 @@ static VALUE re2_regexp_error(VALUE self) {
|
|
1116
1079
|
* If the RE2 could not be created properly, returns
|
1117
1080
|
* the offending portion of the regexp otherwise returns nil.
|
1118
1081
|
*
|
1082
|
+
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
1083
|
+
* returned in UTF-8 by default or ISO-8859-1 if the :utf8 option for the
|
1084
|
+
* RE2::Regexp is set to false (any other encoding's behaviour is undefined).
|
1085
|
+
*
|
1119
1086
|
* @return [String, nil] the offending portion of the regexp or nil
|
1120
1087
|
*/
|
1121
|
-
static VALUE re2_regexp_error_arg(VALUE self) {
|
1088
|
+
static VALUE re2_regexp_error_arg(const VALUE self) {
|
1122
1089
|
re2_pattern *p;
|
1123
1090
|
Data_Get_Struct(self, re2_pattern, p);
|
1091
|
+
|
1124
1092
|
if (p->pattern->ok()) {
|
1125
1093
|
return Qnil;
|
1126
1094
|
} else {
|
1127
|
-
return
|
1095
|
+
return encoded_str_new(p->pattern->error_arg().data(),
|
1128
1096
|
p->pattern->error_arg().size(),
|
1129
|
-
p->pattern->options().encoding()
|
1097
|
+
p->pattern->options().encoding());
|
1130
1098
|
}
|
1131
1099
|
}
|
1132
1100
|
|
@@ -1137,9 +1105,10 @@ static VALUE re2_regexp_error_arg(VALUE self) {
|
|
1137
1105
|
*
|
1138
1106
|
* @return [Integer] the regexp "cost"
|
1139
1107
|
*/
|
1140
|
-
static VALUE re2_regexp_program_size(VALUE self) {
|
1108
|
+
static VALUE re2_regexp_program_size(const VALUE self) {
|
1141
1109
|
re2_pattern *p;
|
1142
1110
|
Data_Get_Struct(self, re2_pattern, p);
|
1111
|
+
|
1143
1112
|
return INT2FIX(p->pattern->ProgramSize());
|
1144
1113
|
}
|
1145
1114
|
|
@@ -1149,12 +1118,11 @@ static VALUE re2_regexp_program_size(VALUE self) {
|
|
1149
1118
|
*
|
1150
1119
|
* @return [Hash] the options
|
1151
1120
|
*/
|
1152
|
-
static VALUE re2_regexp_options(VALUE self) {
|
1153
|
-
VALUE options;
|
1121
|
+
static VALUE re2_regexp_options(const VALUE self) {
|
1154
1122
|
re2_pattern *p;
|
1155
1123
|
|
1156
1124
|
Data_Get_Struct(self, re2_pattern, p);
|
1157
|
-
options = rb_hash_new();
|
1125
|
+
VALUE options = rb_hash_new();
|
1158
1126
|
|
1159
1127
|
rb_hash_aset(options, ID2SYM(id_utf8),
|
1160
1128
|
BOOL2RUBY(p->pattern->options().encoding() == RE2::Options::EncodingUTF8));
|
@@ -1202,33 +1170,34 @@ static VALUE re2_regexp_options(VALUE self) {
|
|
1202
1170
|
*
|
1203
1171
|
* @return [Integer] the number of capturing subpatterns
|
1204
1172
|
*/
|
1205
|
-
static VALUE re2_regexp_number_of_capturing_groups(VALUE self) {
|
1173
|
+
static VALUE re2_regexp_number_of_capturing_groups(const VALUE self) {
|
1206
1174
|
re2_pattern *p;
|
1207
|
-
|
1208
1175
|
Data_Get_Struct(self, re2_pattern, p);
|
1176
|
+
|
1209
1177
|
return INT2FIX(p->pattern->NumberOfCapturingGroups());
|
1210
1178
|
}
|
1211
1179
|
|
1212
1180
|
/*
|
1213
1181
|
* Returns a hash of names to capturing indices of groups.
|
1214
1182
|
*
|
1183
|
+
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
1184
|
+
* returned in UTF-8 by default or ISO-8859-1 if the :utf8 option for the
|
1185
|
+
* RE2::Regexp is set to false (any other encoding's behaviour is undefined).
|
1186
|
+
*
|
1215
1187
|
* @return [Hash] a hash of names to capturing indices
|
1216
1188
|
*/
|
1217
|
-
static VALUE re2_regexp_named_capturing_groups(VALUE self) {
|
1218
|
-
VALUE capturing_groups;
|
1189
|
+
static VALUE re2_regexp_named_capturing_groups(const VALUE self) {
|
1219
1190
|
re2_pattern *p;
|
1220
|
-
map<string, int> groups;
|
1221
|
-
map<string, int>::iterator iterator;
|
1222
1191
|
|
1223
1192
|
Data_Get_Struct(self, re2_pattern, p);
|
1224
|
-
groups = p->pattern->NamedCapturingGroups();
|
1225
|
-
capturing_groups = rb_hash_new();
|
1193
|
+
const std::map<std::string, int>& groups = p->pattern->NamedCapturingGroups();
|
1194
|
+
VALUE capturing_groups = rb_hash_new();
|
1226
1195
|
|
1227
|
-
for (
|
1196
|
+
for (std::map<std::string, int>::const_iterator it = groups.begin(); it != groups.end(); ++it) {
|
1228
1197
|
rb_hash_aset(capturing_groups,
|
1229
|
-
|
1230
|
-
p->pattern->options().encoding()
|
1231
|
-
INT2FIX(
|
1198
|
+
encoded_str_new(it->first.data(), it->first.size(),
|
1199
|
+
p->pattern->options().encoding()),
|
1200
|
+
INT2FIX(it->second));
|
1232
1201
|
}
|
1233
1202
|
|
1234
1203
|
return capturing_groups;
|
@@ -1242,16 +1211,23 @@ static VALUE re2_regexp_named_capturing_groups(VALUE self) {
|
|
1242
1211
|
* @return [Boolean, RE2::MatchData]
|
1243
1212
|
*
|
1244
1213
|
* @overload match(text)
|
1245
|
-
* Returns an {RE2::MatchData} containing the matching
|
1246
|
-
*
|
1247
|
-
*
|
1214
|
+
* Returns an {RE2::MatchData} containing the matching pattern and all
|
1215
|
+
* subpatterns resulting from looking for the regexp in +text+ if the pattern
|
1216
|
+
* contains capturing groups.
|
1217
|
+
*
|
1218
|
+
* Returns either true or false indicating whether a successful match was
|
1219
|
+
* made if the pattern contains no capturing groups.
|
1248
1220
|
*
|
1249
1221
|
* @param [String] text the text to search
|
1250
|
-
* @return [RE2::MatchData] the
|
1222
|
+
* @return [RE2::MatchData] if the pattern contains capturing groups
|
1223
|
+
* @return [Boolean] if the pattern does not contain capturing groups
|
1251
1224
|
* @raise [NoMemoryError] if there was not enough memory to allocate the matches
|
1252
|
-
* @example
|
1225
|
+
* @example Matching with capturing groups
|
1253
1226
|
* r = RE2::Regexp.new('w(o)(o)')
|
1254
1227
|
* r.match('woo') #=> #<RE2::MatchData "woo" 1:"o" 2:"o">
|
1228
|
+
* @example Matching without capturing groups
|
1229
|
+
* r = RE2::Regexp.new('woo')
|
1230
|
+
* r.match('woo') #=> true
|
1255
1231
|
*
|
1256
1232
|
* @overload match(text, 0)
|
1257
1233
|
* Returns either true or false indicating whether a
|
@@ -1279,20 +1255,20 @@ static VALUE re2_regexp_named_capturing_groups(VALUE self) {
|
|
1279
1255
|
* r.match('woo', 1) #=> #<RE2::MatchData "woo" 1:"o">
|
1280
1256
|
* r.match('woo', 3) #=> #<RE2::MatchData "woo" 1:"o" 2:"o" 3:nil>
|
1281
1257
|
*/
|
1282
|
-
static VALUE re2_regexp_match(int argc, VALUE *argv, VALUE self) {
|
1283
|
-
int n;
|
1284
|
-
bool matched;
|
1258
|
+
static VALUE re2_regexp_match(int argc, VALUE *argv, const VALUE self) {
|
1285
1259
|
re2_pattern *p;
|
1286
1260
|
re2_matchdata *m;
|
1287
|
-
VALUE text, number_of_matches
|
1261
|
+
VALUE text, number_of_matches;
|
1288
1262
|
|
1289
1263
|
rb_scan_args(argc, argv, "11", &text, &number_of_matches);
|
1290
1264
|
|
1291
1265
|
/* Ensure text is a string. */
|
1292
|
-
|
1266
|
+
StringValue(text);
|
1293
1267
|
|
1294
1268
|
Data_Get_Struct(self, re2_pattern, p);
|
1295
1269
|
|
1270
|
+
int n;
|
1271
|
+
|
1296
1272
|
if (RTEST(number_of_matches)) {
|
1297
1273
|
n = NUM2INT(number_of_matches);
|
1298
1274
|
|
@@ -1308,17 +1284,21 @@ static VALUE re2_regexp_match(int argc, VALUE *argv, VALUE self) {
|
|
1308
1284
|
}
|
1309
1285
|
|
1310
1286
|
if (n == 0) {
|
1311
|
-
|
1312
|
-
|
1287
|
+
#ifdef HAVE_ENDPOS_ARGUMENT
|
1288
|
+
bool matched = p->pattern->Match(StringValuePtr(text), 0,
|
1289
|
+
RSTRING_LEN(text), RE2::UNANCHORED, 0, 0);
|
1290
|
+
#else
|
1291
|
+
bool matched = p->pattern->Match(StringValuePtr(text), 0, RE2::UNANCHORED,
|
1292
|
+
0, 0);
|
1293
|
+
#endif
|
1313
1294
|
return BOOL2RUBY(matched);
|
1314
1295
|
} else {
|
1315
|
-
|
1316
1296
|
/* Because match returns the whole match as well. */
|
1317
1297
|
n += 1;
|
1318
1298
|
|
1319
|
-
matchdata = rb_class_new_instance(0, 0, re2_cMatchData);
|
1299
|
+
VALUE matchdata = rb_class_new_instance(0, 0, re2_cMatchData);
|
1320
1300
|
Data_Get_Struct(matchdata, re2_matchdata, m);
|
1321
|
-
m->matches = new(nothrow) re2::StringPiece[n];
|
1301
|
+
m->matches = new(std::nothrow) re2::StringPiece[n];
|
1322
1302
|
m->regexp = self;
|
1323
1303
|
m->text = rb_str_dup(text);
|
1324
1304
|
rb_str_freeze(m->text);
|
@@ -1330,10 +1310,13 @@ static VALUE re2_regexp_match(int argc, VALUE *argv, VALUE self) {
|
|
1330
1310
|
|
1331
1311
|
m->number_of_matches = n;
|
1332
1312
|
|
1333
|
-
|
1334
|
-
|
1335
|
-
|
1336
|
-
|
1313
|
+
#ifdef HAVE_ENDPOS_ARGUMENT
|
1314
|
+
bool matched = p->pattern->Match(StringValuePtr(m->text), 0,
|
1315
|
+
RSTRING_LEN(m->text), RE2::UNANCHORED, m->matches, n);
|
1316
|
+
#else
|
1317
|
+
bool matched = p->pattern->Match(StringValuePtr(m->text), 0,
|
1318
|
+
RE2::UNANCHORED, m->matches, n);
|
1319
|
+
#endif
|
1337
1320
|
if (matched) {
|
1338
1321
|
return matchdata;
|
1339
1322
|
} else {
|
@@ -1348,10 +1331,8 @@ static VALUE re2_regexp_match(int argc, VALUE *argv, VALUE self) {
|
|
1348
1331
|
*
|
1349
1332
|
* @return [Boolean] whether the match was successful
|
1350
1333
|
*/
|
1351
|
-
static VALUE re2_regexp_match_p(VALUE self, VALUE text) {
|
1352
|
-
VALUE argv[2];
|
1353
|
-
argv[0] = text;
|
1354
|
-
argv[1] = INT2FIX(0);
|
1334
|
+
static VALUE re2_regexp_match_p(const VALUE self, VALUE text) {
|
1335
|
+
VALUE argv[2] = { text, INT2FIX(0) };
|
1355
1336
|
|
1356
1337
|
return re2_regexp_match(2, argv, self);
|
1357
1338
|
}
|
@@ -1362,16 +1343,15 @@ static VALUE re2_regexp_match_p(VALUE self, VALUE text) {
|
|
1362
1343
|
* @example
|
1363
1344
|
* c = RE2::Regexp.new('(\w+)').scan("Foo bar baz")
|
1364
1345
|
*/
|
1365
|
-
static VALUE re2_regexp_scan(VALUE self, VALUE text) {
|
1346
|
+
static VALUE re2_regexp_scan(const VALUE self, VALUE text) {
|
1366
1347
|
re2_pattern *p;
|
1367
1348
|
re2_scanner *c;
|
1368
|
-
VALUE scanner;
|
1369
1349
|
|
1370
1350
|
Data_Get_Struct(self, re2_pattern, p);
|
1371
|
-
scanner = rb_class_new_instance(0, 0, re2_cScanner);
|
1351
|
+
VALUE scanner = rb_class_new_instance(0, 0, re2_cScanner);
|
1372
1352
|
Data_Get_Struct(scanner, re2_scanner, c);
|
1373
1353
|
|
1374
|
-
c->input = new(nothrow) re2::StringPiece(StringValuePtr(text));
|
1354
|
+
c->input = new(std::nothrow) re2::StringPiece(StringValuePtr(text));
|
1375
1355
|
c->regexp = self;
|
1376
1356
|
c->text = text;
|
1377
1357
|
|
@@ -1390,6 +1370,10 @@ static VALUE re2_regexp_scan(VALUE self, VALUE text) {
|
|
1390
1370
|
* Returns a copy of +str+ with the first occurrence +pattern+
|
1391
1371
|
* replaced with +rewrite+.
|
1392
1372
|
*
|
1373
|
+
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
1374
|
+
* returned in UTF-8 by default or ISO-8859-1 if the :utf8 option for the
|
1375
|
+
* RE2::Regexp is set to false (any other encoding's behaviour is undefined).
|
1376
|
+
*
|
1393
1377
|
* @param [String] str the string to modify
|
1394
1378
|
* @param [String, RE2::Regexp] pattern a regexp matching text to be replaced
|
1395
1379
|
* @param [String] rewrite the string to replace with
|
@@ -1404,29 +1388,33 @@ static VALUE re2_Replace(VALUE self, VALUE str, VALUE pattern,
|
|
1404
1388
|
UNUSED(self);
|
1405
1389
|
re2_pattern *p;
|
1406
1390
|
|
1407
|
-
/*
|
1408
|
-
|
1391
|
+
/* Take a copy of str so it can be modified in-place by
|
1392
|
+
* RE2::Replace.
|
1393
|
+
*/
|
1394
|
+
std::string str_as_string(StringValuePtr(str));
|
1409
1395
|
|
1410
1396
|
/* Do the replacement. */
|
1411
1397
|
if (rb_obj_is_kind_of(pattern, re2_cRegexp)) {
|
1412
1398
|
Data_Get_Struct(pattern, re2_pattern, p);
|
1413
1399
|
RE2::Replace(&str_as_string, *p->pattern, StringValuePtr(rewrite));
|
1414
1400
|
|
1415
|
-
return
|
1416
|
-
p->pattern->options().encoding()
|
1401
|
+
return encoded_str_new(str_as_string.data(), str_as_string.size(),
|
1402
|
+
p->pattern->options().encoding());
|
1417
1403
|
} else {
|
1418
1404
|
RE2::Replace(&str_as_string, StringValuePtr(pattern),
|
1419
1405
|
StringValuePtr(rewrite));
|
1420
1406
|
|
1421
|
-
return
|
1422
|
-
pattern);
|
1407
|
+
return encoded_str_new(str_as_string.data(), str_as_string.size(), RE2::Options::EncodingUTF8);
|
1423
1408
|
}
|
1424
|
-
|
1425
1409
|
}
|
1426
1410
|
|
1427
1411
|
/*
|
1428
1412
|
* Return a copy of +str+ with +pattern+ replaced by +rewrite+.
|
1429
1413
|
*
|
1414
|
+
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
1415
|
+
* returned in UTF-8 by default or ISO-8859-1 if the :utf8 option for the
|
1416
|
+
* RE2::Regexp is set to false (any other encoding's behaviour is undefined).
|
1417
|
+
*
|
1430
1418
|
* @param [String] str the string to modify
|
1431
1419
|
* @param [String, RE2::Regexp] pattern a regexp matching text to be replaced
|
1432
1420
|
* @param [String] rewrite the string to replace with
|
@@ -1440,23 +1428,24 @@ static VALUE re2_GlobalReplace(VALUE self, VALUE str, VALUE pattern,
|
|
1440
1428
|
VALUE rewrite) {
|
1441
1429
|
UNUSED(self);
|
1442
1430
|
|
1443
|
-
/*
|
1431
|
+
/* Take a copy of str so it can be modified in-place by
|
1432
|
+
* RE2::GlobalReplace.
|
1433
|
+
*/
|
1444
1434
|
re2_pattern *p;
|
1445
|
-
string str_as_string(StringValuePtr(str));
|
1435
|
+
std::string str_as_string(StringValuePtr(str));
|
1446
1436
|
|
1447
1437
|
/* Do the replacement. */
|
1448
1438
|
if (rb_obj_is_kind_of(pattern, re2_cRegexp)) {
|
1449
1439
|
Data_Get_Struct(pattern, re2_pattern, p);
|
1450
1440
|
RE2::GlobalReplace(&str_as_string, *p->pattern, StringValuePtr(rewrite));
|
1451
1441
|
|
1452
|
-
return
|
1453
|
-
p->pattern->options().encoding()
|
1442
|
+
return encoded_str_new(str_as_string.data(), str_as_string.size(),
|
1443
|
+
p->pattern->options().encoding());
|
1454
1444
|
} else {
|
1455
1445
|
RE2::GlobalReplace(&str_as_string, StringValuePtr(pattern),
|
1456
1446
|
StringValuePtr(rewrite));
|
1457
1447
|
|
1458
|
-
return
|
1459
|
-
pattern);
|
1448
|
+
return encoded_str_new(str_as_string.data(), str_as_string.size(), RE2::Options::EncodingUTF8);
|
1460
1449
|
}
|
1461
1450
|
}
|
1462
1451
|
|
@@ -1472,11 +1461,12 @@ static VALUE re2_GlobalReplace(VALUE self, VALUE str, VALUE pattern,
|
|
1472
1461
|
*/
|
1473
1462
|
static VALUE re2_QuoteMeta(VALUE self, VALUE unquoted) {
|
1474
1463
|
UNUSED(self);
|
1475
|
-
string quoted_string = RE2::QuoteMeta(StringValuePtr(unquoted));
|
1464
|
+
std::string quoted_string = RE2::QuoteMeta(StringValuePtr(unquoted));
|
1465
|
+
|
1476
1466
|
return rb_str_new(quoted_string.data(), quoted_string.size());
|
1477
1467
|
}
|
1478
1468
|
|
1479
|
-
void re2_set_free(re2_set *self) {
|
1469
|
+
static void re2_set_free(re2_set *self) {
|
1480
1470
|
if (self->set) {
|
1481
1471
|
delete self->set;
|
1482
1472
|
}
|
@@ -1486,6 +1476,7 @@ void re2_set_free(re2_set *self) {
|
|
1486
1476
|
static VALUE re2_set_allocate(VALUE klass) {
|
1487
1477
|
re2_set *s;
|
1488
1478
|
VALUE result = Data_Make_Struct(klass, re2_set, 0, re2_set_free, s);
|
1479
|
+
|
1489
1480
|
return result;
|
1490
1481
|
}
|
1491
1482
|
|
@@ -1533,18 +1524,16 @@ static VALUE re2_set_allocate(VALUE klass) {
|
|
1533
1524
|
static VALUE re2_set_initialize(int argc, VALUE *argv, VALUE self) {
|
1534
1525
|
VALUE anchor, options;
|
1535
1526
|
re2_set *s;
|
1536
|
-
RE2::Anchor re2_anchor;
|
1527
|
+
RE2::Anchor re2_anchor = RE2::UNANCHORED;
|
1537
1528
|
RE2::Options re2_options;
|
1538
1529
|
|
1539
1530
|
rb_scan_args(argc, argv, "02", &anchor, &options);
|
1540
1531
|
Data_Get_Struct(self, re2_set, s);
|
1541
1532
|
|
1542
1533
|
if (RTEST(options)) {
|
1543
|
-
parse_re2_options(re2_options, options);
|
1534
|
+
parse_re2_options(&re2_options, options);
|
1544
1535
|
}
|
1545
|
-
if (NIL_P(anchor)) {
|
1546
|
-
re2_anchor = RE2::UNANCHORED;
|
1547
|
-
} else {
|
1536
|
+
if (!NIL_P(anchor)) {
|
1548
1537
|
Check_Type(anchor, T_SYMBOL);
|
1549
1538
|
ID id_anchor = SYM2ID(anchor);
|
1550
1539
|
if (id_anchor == id_unanchored) {
|
@@ -1558,7 +1547,7 @@ static VALUE re2_set_initialize(int argc, VALUE *argv, VALUE self) {
|
|
1558
1547
|
}
|
1559
1548
|
}
|
1560
1549
|
|
1561
|
-
s->set = new(nothrow) RE2::Set(re2_options, re2_anchor);
|
1550
|
+
s->set = new(std::nothrow) RE2::Set(re2_options, re2_anchor);
|
1562
1551
|
if (s->set == 0) {
|
1563
1552
|
rb_raise(rb_eNoMemError, "not enough memory to allocate RE2::Set object");
|
1564
1553
|
}
|
@@ -1579,11 +1568,12 @@ static VALUE re2_set_initialize(int argc, VALUE *argv, VALUE self) {
|
|
1579
1568
|
* set.add("def") #=> 1
|
1580
1569
|
*/
|
1581
1570
|
static VALUE re2_set_add(VALUE self, VALUE pattern) {
|
1582
|
-
|
1571
|
+
StringValue(pattern);
|
1583
1572
|
re2::StringPiece regex(RSTRING_PTR(pattern), RSTRING_LEN(pattern));
|
1584
1573
|
std::string err;
|
1585
1574
|
re2_set *s;
|
1586
1575
|
Data_Get_Struct(self, re2_set, s);
|
1576
|
+
|
1587
1577
|
int index = s->set->Add(regex, &err);
|
1588
1578
|
if (index < 0) {
|
1589
1579
|
rb_raise(rb_eArgError, "str rejected by RE2::Set->Add(): %s", err.c_str());
|
@@ -1665,25 +1655,27 @@ static VALUE re2_set_match_raises_errors_p(VALUE self) {
|
|
1665
1655
|
* set.compile
|
1666
1656
|
* set.match("abcdef", :exception => true) # => [0, 1]
|
1667
1657
|
*/
|
1668
|
-
static VALUE re2_set_match(int argc, VALUE *argv, VALUE self) {
|
1669
|
-
VALUE str, options
|
1658
|
+
static VALUE re2_set_match(int argc, VALUE *argv, const VALUE self) {
|
1659
|
+
VALUE str, options;
|
1670
1660
|
bool raise_exception = true;
|
1671
1661
|
rb_scan_args(argc, argv, "11", &str, &options);
|
1672
|
-
|
1662
|
+
|
1663
|
+
StringValue(str);
|
1673
1664
|
re2::StringPiece data(RSTRING_PTR(str), RSTRING_LEN(str));
|
1674
|
-
std::vector<int> v;
|
1675
1665
|
re2_set *s;
|
1676
1666
|
Data_Get_Struct(self, re2_set, s);
|
1677
1667
|
|
1678
1668
|
if (RTEST(options)) {
|
1679
1669
|
Check_Type(options, T_HASH);
|
1680
1670
|
|
1681
|
-
exception_option = rb_hash_aref(options, ID2SYM(id_exception));
|
1671
|
+
VALUE exception_option = rb_hash_aref(options, ID2SYM(id_exception));
|
1682
1672
|
if (!NIL_P(exception_option)) {
|
1683
1673
|
raise_exception = RTEST(exception_option);
|
1684
1674
|
}
|
1685
1675
|
}
|
1686
1676
|
|
1677
|
+
std::vector<int> v;
|
1678
|
+
|
1687
1679
|
if (raise_exception) {
|
1688
1680
|
#ifdef HAVE_ERROR_INFO_ARGUMENT
|
1689
1681
|
RE2::Set::ErrorInfo e;
|
@@ -1704,7 +1696,7 @@ static VALUE re2_set_match(int argc, VALUE *argv, VALUE self) {
|
|
1704
1696
|
rb_raise(re2_eSetMatchError, "Unknown RE2::Set::ErrorKind: %d", e.kind);
|
1705
1697
|
}
|
1706
1698
|
} else {
|
1707
|
-
for (
|
1699
|
+
for (std::vector<int>::size_type i = 0; i < v.size(); ++i) {
|
1708
1700
|
rb_ary_push(result, INT2FIX(v[i]));
|
1709
1701
|
}
|
1710
1702
|
}
|
@@ -1718,7 +1710,7 @@ static VALUE re2_set_match(int argc, VALUE *argv, VALUE self) {
|
|
1718
1710
|
VALUE result = rb_ary_new2(v.size());
|
1719
1711
|
|
1720
1712
|
if (matched) {
|
1721
|
-
for (
|
1713
|
+
for (std::vector<int>::size_type i = 0; i < v.size(); ++i) {
|
1722
1714
|
rb_ary_push(result, INT2FIX(v[i]));
|
1723
1715
|
}
|
1724
1716
|
}
|
@@ -1868,7 +1860,7 @@ void Init_re2(void) {
|
|
1868
1860
|
rb_define_singleton_method(re2_cRegexp, "compile",
|
1869
1861
|
RUBY_METHOD_FUNC(rb_class_new_instance), -1);
|
1870
1862
|
|
1871
|
-
|
1863
|
+
rb_define_module_function(rb_mKernel, "RE2", RUBY_METHOD_FUNC(re2_re2), -1);
|
1872
1864
|
|
1873
1865
|
/* Create the symbols used in options. */
|
1874
1866
|
id_utf8 = rb_intern("utf8");
|