re2 1.7.0 → 2.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rspec +2 -0
- data/Gemfile +5 -0
- data/LICENSE-DEPENDENCIES.txt +237 -0
- data/LICENSE.txt +1 -1
- data/README.md +79 -19
- data/Rakefile +125 -3
- data/dependencies.yml +9 -0
- data/ext/re2/extconf.rb +371 -57
- data/ext/re2/re2.cc +365 -353
- data/ext/re2/recipes.rb +43 -0
- data/lib/re2/version.rb +5 -0
- data/lib/re2.rb +8 -1
- data/ports/archives/20230125.3.tar.gz +0 -0
- data/ports/archives/re2-2023-09-01.tar.gz +0 -0
- data/re2.gemspec +44 -0
- data/spec/kernel_spec.rb +3 -3
- data/spec/re2/match_data_spec.rb +24 -0
- data/spec/re2/regexp_spec.rb +6 -0
- data/spec/re2/scanner_spec.rb +76 -22
- data/spec/re2/set_spec.rb +55 -2
- data/spec/re2/string_spec.rb +7 -3
- data/spec/re2_spec.rb +104 -10
- data/spec/spec_helper.rb +10 -0
- metadata +50 -11
data/ext/re2/re2.cc
CHANGED
@@ -6,76 +6,19 @@
|
|
6
6
|
* Released under the BSD Licence, please see LICENSE.txt
|
7
7
|
*/
|
8
8
|
|
9
|
-
#include <ruby.h>
|
10
|
-
#include <re2/re2.h>
|
11
|
-
#include <re2/set.h>
|
12
9
|
#include <stdint.h>
|
13
|
-
|
10
|
+
|
11
|
+
#include <map>
|
14
12
|
#include <sstream>
|
13
|
+
#include <string>
|
15
14
|
#include <vector>
|
16
|
-
using std::string;
|
17
|
-
using std::ostringstream;
|
18
|
-
using std::nothrow;
|
19
|
-
using std::map;
|
20
|
-
using std::vector;
|
21
|
-
|
22
|
-
#define BOOL2RUBY(v) (v ? Qtrue : Qfalse)
|
23
|
-
#define UNUSED(x) ((void)x)
|
24
15
|
|
25
|
-
#
|
26
|
-
|
27
|
-
#
|
28
|
-
|
29
|
-
#ifndef RSTRING_PTR
|
30
|
-
#define RSTRING_PTR(x) (RSTRING(x)->ptr)
|
31
|
-
#endif
|
32
|
-
|
33
|
-
#ifdef HAVE_RUBY_ENCODING_H
|
34
|
-
#include <ruby/encoding.h>
|
35
|
-
#define ENCODED_STR_NEW(str, length, encoding) \
|
36
|
-
({ \
|
37
|
-
VALUE _string = rb_str_new(str, length); \
|
38
|
-
int _enc = rb_enc_find_index(encoding); \
|
39
|
-
rb_enc_associate_index(_string, _enc); \
|
40
|
-
_string; \
|
41
|
-
})
|
42
|
-
#define ENCODED_STR_NEW2(str, length, str2) \
|
43
|
-
({ \
|
44
|
-
VALUE _string = rb_str_new(str, length); \
|
45
|
-
int _enc = rb_enc_get_index(str2); \
|
46
|
-
rb_enc_associate_index(_string, _enc); \
|
47
|
-
_string; \
|
48
|
-
})
|
49
|
-
#else
|
50
|
-
#define ENCODED_STR_NEW(str, length, encoding) \
|
51
|
-
rb_str_new((const char *)str, (long)length)
|
52
|
-
#define ENCODED_STR_NEW2(str, length, str2) \
|
53
|
-
rb_str_new((const char *)str, (long)length)
|
54
|
-
#endif
|
55
|
-
|
56
|
-
#ifdef HAVE_RB_STR_SUBLEN
|
57
|
-
#define ENCODED_STR_SUBLEN(str, offset, encoding) \
|
58
|
-
LONG2NUM(rb_str_sublen(str, offset))
|
59
|
-
#else
|
60
|
-
#ifdef HAVE_RUBY_ENCODING_H
|
61
|
-
#define ENCODED_STR_SUBLEN(str, offset, encoding) \
|
62
|
-
({ \
|
63
|
-
VALUE _string = ENCODED_STR_NEW(RSTRING_PTR(str), offset, encoding); \
|
64
|
-
rb_str_length(_string); \
|
65
|
-
})
|
66
|
-
#else
|
67
|
-
#define ENCODED_STR_SUBLEN(str, offset, encoding) \
|
68
|
-
LONG2NUM(offset)
|
69
|
-
#endif
|
70
|
-
#endif
|
16
|
+
#include <re2/re2.h>
|
17
|
+
#include <re2/set.h>
|
18
|
+
#include <ruby.h>
|
19
|
+
#include <ruby/encoding.h>
|
71
20
|
|
72
|
-
#
|
73
|
-
#define match(pattern, text, startpos, endpos, anchor, match, nmatch) \
|
74
|
-
(pattern->Match(text, startpos, endpos, anchor, match, nmatch))
|
75
|
-
#else
|
76
|
-
#define match(pattern, text, startpos, endpos, anchor, match, nmatch) \
|
77
|
-
(pattern->Match(text, startpos, anchor, match, nmatch))
|
78
|
-
#endif
|
21
|
+
#define BOOL2RUBY(v) (v ? Qtrue : Qfalse)
|
79
22
|
|
80
23
|
typedef struct {
|
81
24
|
RE2 *pattern;
|
@@ -107,95 +50,103 @@ static ID id_utf8, id_posix_syntax, id_longest_match, id_log_errors,
|
|
107
50
|
id_perl_classes, id_word_boundary, id_one_line,
|
108
51
|
id_unanchored, id_anchor_start, id_anchor_both, id_exception;
|
109
52
|
|
110
|
-
|
53
|
+
inline VALUE encoded_str_new(const char *str, long length, RE2::Options::Encoding encoding) {
|
54
|
+
if (encoding == RE2::Options::EncodingUTF8) {
|
55
|
+
return rb_utf8_str_new(str, length);
|
56
|
+
}
|
57
|
+
|
58
|
+
VALUE string = rb_str_new(str, length);
|
59
|
+
rb_enc_associate_index(string, rb_enc_find_index("ISO-8859-1"));
|
60
|
+
|
61
|
+
return string;
|
62
|
+
}
|
63
|
+
|
64
|
+
static void parse_re2_options(RE2::Options* re2_options, const VALUE options) {
|
111
65
|
if (TYPE(options) != T_HASH) {
|
112
66
|
rb_raise(rb_eArgError, "options should be a hash");
|
113
67
|
}
|
114
|
-
VALUE utf8, posix_syntax, longest_match, log_errors,
|
115
|
-
max_mem, literal, never_nl, case_sensitive, perl_classes,
|
116
|
-
word_boundary, one_line;
|
117
68
|
|
118
|
-
utf8 = rb_hash_aref(options, ID2SYM(id_utf8));
|
69
|
+
VALUE utf8 = rb_hash_aref(options, ID2SYM(id_utf8));
|
119
70
|
if (!NIL_P(utf8)) {
|
120
|
-
re2_options
|
71
|
+
re2_options->set_encoding(RTEST(utf8) ? RE2::Options::EncodingUTF8 : RE2::Options::EncodingLatin1);
|
121
72
|
}
|
122
73
|
|
123
|
-
posix_syntax = rb_hash_aref(options, ID2SYM(id_posix_syntax));
|
74
|
+
VALUE posix_syntax = rb_hash_aref(options, ID2SYM(id_posix_syntax));
|
124
75
|
if (!NIL_P(posix_syntax)) {
|
125
|
-
re2_options
|
76
|
+
re2_options->set_posix_syntax(RTEST(posix_syntax));
|
126
77
|
}
|
127
78
|
|
128
|
-
longest_match = rb_hash_aref(options, ID2SYM(id_longest_match));
|
79
|
+
VALUE longest_match = rb_hash_aref(options, ID2SYM(id_longest_match));
|
129
80
|
if (!NIL_P(longest_match)) {
|
130
|
-
re2_options
|
81
|
+
re2_options->set_longest_match(RTEST(longest_match));
|
131
82
|
}
|
132
83
|
|
133
|
-
log_errors = rb_hash_aref(options, ID2SYM(id_log_errors));
|
84
|
+
VALUE log_errors = rb_hash_aref(options, ID2SYM(id_log_errors));
|
134
85
|
if (!NIL_P(log_errors)) {
|
135
|
-
re2_options
|
86
|
+
re2_options->set_log_errors(RTEST(log_errors));
|
136
87
|
}
|
137
88
|
|
138
|
-
max_mem = rb_hash_aref(options, ID2SYM(id_max_mem));
|
89
|
+
VALUE max_mem = rb_hash_aref(options, ID2SYM(id_max_mem));
|
139
90
|
if (!NIL_P(max_mem)) {
|
140
|
-
re2_options
|
91
|
+
re2_options->set_max_mem(NUM2INT(max_mem));
|
141
92
|
}
|
142
93
|
|
143
|
-
literal = rb_hash_aref(options, ID2SYM(id_literal));
|
94
|
+
VALUE literal = rb_hash_aref(options, ID2SYM(id_literal));
|
144
95
|
if (!NIL_P(literal)) {
|
145
|
-
re2_options
|
96
|
+
re2_options->set_literal(RTEST(literal));
|
146
97
|
}
|
147
98
|
|
148
|
-
never_nl = rb_hash_aref(options, ID2SYM(id_never_nl));
|
99
|
+
VALUE never_nl = rb_hash_aref(options, ID2SYM(id_never_nl));
|
149
100
|
if (!NIL_P(never_nl)) {
|
150
|
-
re2_options
|
101
|
+
re2_options->set_never_nl(RTEST(never_nl));
|
151
102
|
}
|
152
103
|
|
153
|
-
case_sensitive = rb_hash_aref(options, ID2SYM(id_case_sensitive));
|
104
|
+
VALUE case_sensitive = rb_hash_aref(options, ID2SYM(id_case_sensitive));
|
154
105
|
if (!NIL_P(case_sensitive)) {
|
155
|
-
re2_options
|
106
|
+
re2_options->set_case_sensitive(RTEST(case_sensitive));
|
156
107
|
}
|
157
108
|
|
158
|
-
perl_classes = rb_hash_aref(options, ID2SYM(id_perl_classes));
|
109
|
+
VALUE perl_classes = rb_hash_aref(options, ID2SYM(id_perl_classes));
|
159
110
|
if (!NIL_P(perl_classes)) {
|
160
|
-
re2_options
|
111
|
+
re2_options->set_perl_classes(RTEST(perl_classes));
|
161
112
|
}
|
162
113
|
|
163
|
-
word_boundary = rb_hash_aref(options, ID2SYM(id_word_boundary));
|
114
|
+
VALUE word_boundary = rb_hash_aref(options, ID2SYM(id_word_boundary));
|
164
115
|
if (!NIL_P(word_boundary)) {
|
165
|
-
re2_options
|
116
|
+
re2_options->set_word_boundary(RTEST(word_boundary));
|
166
117
|
}
|
167
118
|
|
168
|
-
one_line = rb_hash_aref(options, ID2SYM(id_one_line));
|
119
|
+
VALUE one_line = rb_hash_aref(options, ID2SYM(id_one_line));
|
169
120
|
if (!NIL_P(one_line)) {
|
170
|
-
re2_options
|
121
|
+
re2_options->set_one_line(RTEST(one_line));
|
171
122
|
}
|
172
123
|
}
|
173
124
|
|
174
|
-
void re2_matchdata_mark(re2_matchdata* self) {
|
125
|
+
static void re2_matchdata_mark(re2_matchdata* self) {
|
175
126
|
rb_gc_mark(self->regexp);
|
176
127
|
rb_gc_mark(self->text);
|
177
128
|
}
|
178
129
|
|
179
|
-
void re2_matchdata_free(re2_matchdata* self) {
|
130
|
+
static void re2_matchdata_free(re2_matchdata* self) {
|
180
131
|
if (self->matches) {
|
181
132
|
delete[] self->matches;
|
182
133
|
}
|
183
134
|
free(self);
|
184
135
|
}
|
185
136
|
|
186
|
-
void re2_scanner_mark(re2_scanner* self) {
|
137
|
+
static void re2_scanner_mark(re2_scanner* self) {
|
187
138
|
rb_gc_mark(self->regexp);
|
188
139
|
rb_gc_mark(self->text);
|
189
140
|
}
|
190
141
|
|
191
|
-
void re2_scanner_free(re2_scanner* self) {
|
142
|
+
static void re2_scanner_free(re2_scanner* self) {
|
192
143
|
if (self->input) {
|
193
144
|
delete self->input;
|
194
145
|
}
|
195
146
|
free(self);
|
196
147
|
}
|
197
148
|
|
198
|
-
void re2_regexp_free(re2_pattern* self) {
|
149
|
+
static void re2_regexp_free(re2_pattern* self) {
|
199
150
|
if (self->pattern) {
|
200
151
|
delete self->pattern;
|
201
152
|
}
|
@@ -204,12 +155,14 @@ void re2_regexp_free(re2_pattern* self) {
|
|
204
155
|
|
205
156
|
static VALUE re2_matchdata_allocate(VALUE klass) {
|
206
157
|
re2_matchdata *m;
|
158
|
+
|
207
159
|
return Data_Make_Struct(klass, re2_matchdata, re2_matchdata_mark,
|
208
160
|
re2_matchdata_free, m);
|
209
161
|
}
|
210
162
|
|
211
163
|
static VALUE re2_scanner_allocate(VALUE klass) {
|
212
164
|
re2_scanner *c;
|
165
|
+
|
213
166
|
return Data_Make_Struct(klass, re2_scanner, re2_scanner_mark,
|
214
167
|
re2_scanner_free, c);
|
215
168
|
}
|
@@ -222,7 +175,7 @@ static VALUE re2_scanner_allocate(VALUE klass) {
|
|
222
175
|
* m = RE2::Regexp.new('(\d+)').match("bob 123")
|
223
176
|
* m.string #=> "bob 123"
|
224
177
|
*/
|
225
|
-
static VALUE re2_matchdata_string(VALUE self) {
|
178
|
+
static VALUE re2_matchdata_string(const VALUE self) {
|
226
179
|
re2_matchdata *m;
|
227
180
|
Data_Get_Struct(self, re2_matchdata, m);
|
228
181
|
|
@@ -237,7 +190,7 @@ static VALUE re2_matchdata_string(VALUE self) {
|
|
237
190
|
* c = RE2::Regexp.new('(\d+)').scan("foo")
|
238
191
|
* c.string #=> "foo"
|
239
192
|
*/
|
240
|
-
static VALUE re2_scanner_string(VALUE self) {
|
193
|
+
static VALUE re2_scanner_string(const VALUE self) {
|
241
194
|
re2_scanner *c;
|
242
195
|
Data_Get_Struct(self, re2_scanner, c);
|
243
196
|
|
@@ -252,7 +205,7 @@ static VALUE re2_scanner_string(VALUE self) {
|
|
252
205
|
* c = RE2::Regexp.new('(\d+)').scan("foo")
|
253
206
|
* c.eof? #=> true
|
254
207
|
*/
|
255
|
-
static VALUE re2_scanner_eof(VALUE self) {
|
208
|
+
static VALUE re2_scanner_eof(const VALUE self) {
|
256
209
|
re2_scanner *c;
|
257
210
|
Data_Get_Struct(self, re2_scanner, c);
|
258
211
|
|
@@ -274,7 +227,8 @@ static VALUE re2_scanner_rewind(VALUE self) {
|
|
274
227
|
re2_scanner *c;
|
275
228
|
Data_Get_Struct(self, re2_scanner, c);
|
276
229
|
|
277
|
-
c->input
|
230
|
+
delete c->input;
|
231
|
+
c->input = new(std::nothrow) re2::StringPiece(RSTRING_PTR(c->text));
|
278
232
|
c->eof = false;
|
279
233
|
|
280
234
|
return self;
|
@@ -284,6 +238,10 @@ static VALUE re2_scanner_rewind(VALUE self) {
|
|
284
238
|
* Scan the given text incrementally for matches, returning an array of
|
285
239
|
* matches on each subsequent call. Returns nil if no matches are found.
|
286
240
|
*
|
241
|
+
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
242
|
+
* returned in UTF-8 by default or ISO-8859-1 if the :utf8 option for the
|
243
|
+
* RE2::Regexp is set to false (any other encoding's behaviour is undefined).
|
244
|
+
*
|
287
245
|
* @return [Array<String>] the matches.
|
288
246
|
* @example
|
289
247
|
* s = RE2::Regexp.new('(\w+)').scan("Foo bar baz")
|
@@ -291,45 +249,41 @@ static VALUE re2_scanner_rewind(VALUE self) {
|
|
291
249
|
* s.scan #=> ["bar"]
|
292
250
|
*/
|
293
251
|
static VALUE re2_scanner_scan(VALUE self) {
|
294
|
-
int i;
|
295
|
-
size_t original_input_size, new_input_size;
|
296
|
-
bool input_advanced;
|
297
252
|
re2_pattern *p;
|
298
253
|
re2_scanner *c;
|
299
|
-
VALUE result;
|
300
254
|
|
301
255
|
Data_Get_Struct(self, re2_scanner, c);
|
302
256
|
Data_Get_Struct(c->regexp, re2_pattern, p);
|
303
257
|
|
304
|
-
vector<RE2::Arg> argv(c->number_of_capturing_groups);
|
305
|
-
vector<RE2::Arg*> args(c->number_of_capturing_groups);
|
306
|
-
vector<string> matches(c->number_of_capturing_groups);
|
258
|
+
std::vector<RE2::Arg> argv(c->number_of_capturing_groups);
|
259
|
+
std::vector<RE2::Arg*> args(c->number_of_capturing_groups);
|
260
|
+
std::vector<std::string> matches(c->number_of_capturing_groups);
|
307
261
|
|
308
262
|
if (c->eof) {
|
309
263
|
return Qnil;
|
310
264
|
}
|
311
265
|
|
312
|
-
original_input_size = c->input->size();
|
266
|
+
re2::StringPiece::size_type original_input_size = c->input->size();
|
313
267
|
|
314
|
-
for (i = 0; i < c->number_of_capturing_groups; i
|
315
|
-
matches[i] = "";
|
268
|
+
for (int i = 0; i < c->number_of_capturing_groups; ++i) {
|
316
269
|
argv[i] = &matches[i];
|
317
270
|
args[i] = &argv[i];
|
318
271
|
}
|
319
272
|
|
320
|
-
if (RE2::FindAndConsumeN(c->input, *p->pattern,
|
273
|
+
if (RE2::FindAndConsumeN(c->input, *p->pattern, args.data(),
|
321
274
|
c->number_of_capturing_groups)) {
|
322
|
-
|
323
|
-
|
324
|
-
|
275
|
+
re2::StringPiece::size_type new_input_size = c->input->size();
|
276
|
+
bool input_advanced = new_input_size < original_input_size;
|
277
|
+
|
278
|
+
VALUE result = rb_ary_new2(c->number_of_capturing_groups);
|
325
279
|
|
326
|
-
for (i = 0; i < c->number_of_capturing_groups; i
|
280
|
+
for (int i = 0; i < c->number_of_capturing_groups; ++i) {
|
327
281
|
if (matches[i].empty()) {
|
328
282
|
rb_ary_push(result, Qnil);
|
329
283
|
} else {
|
330
|
-
rb_ary_push(result,
|
284
|
+
rb_ary_push(result, encoded_str_new(matches[i].data(),
|
331
285
|
matches[i].size(),
|
332
|
-
p->pattern->options().encoding()
|
286
|
+
p->pattern->options().encoding()));
|
333
287
|
}
|
334
288
|
}
|
335
289
|
|
@@ -340,47 +294,41 @@ static VALUE re2_scanner_scan(VALUE self) {
|
|
340
294
|
if (!input_advanced && new_input_size > 0) {
|
341
295
|
c->input->remove_prefix(1);
|
342
296
|
}
|
297
|
+
|
298
|
+
return result;
|
343
299
|
} else {
|
344
|
-
|
300
|
+
return Qnil;
|
345
301
|
}
|
346
|
-
|
347
|
-
return result;
|
348
302
|
}
|
349
303
|
|
350
304
|
/*
|
351
305
|
* Retrieve a matchdata by index or name.
|
352
306
|
*/
|
353
|
-
re2::StringPiece *re2_matchdata_find_match(VALUE idx, VALUE self) {
|
354
|
-
int id;
|
307
|
+
static re2::StringPiece *re2_matchdata_find_match(VALUE idx, const VALUE self) {
|
355
308
|
re2_matchdata *m;
|
356
309
|
re2_pattern *p;
|
357
|
-
map<string, int> groups;
|
358
|
-
string name;
|
359
|
-
re2::StringPiece *match;
|
360
310
|
|
361
311
|
Data_Get_Struct(self, re2_matchdata, m);
|
362
312
|
Data_Get_Struct(m->regexp, re2_pattern, p);
|
363
313
|
|
314
|
+
int id;
|
315
|
+
|
364
316
|
if (FIXNUM_P(idx)) {
|
365
317
|
id = FIX2INT(idx);
|
366
318
|
} else {
|
367
|
-
|
368
|
-
|
369
|
-
|
370
|
-
name = StringValuePtr(idx);
|
371
|
-
}
|
372
|
-
|
373
|
-
groups = p->pattern->NamedCapturingGroups();
|
319
|
+
const char *name = SYMBOL_P(idx) ? rb_id2name(SYM2ID(idx)) : StringValuePtr(idx);
|
320
|
+
const std::map<std::string, int>& groups = p->pattern->NamedCapturingGroups();
|
321
|
+
std::map<std::string, int>::const_iterator search = groups.find(name);
|
374
322
|
|
375
|
-
if (groups.
|
376
|
-
id =
|
323
|
+
if (search != groups.end()) {
|
324
|
+
id = search->second;
|
377
325
|
} else {
|
378
326
|
return NULL;
|
379
327
|
}
|
380
328
|
}
|
381
329
|
|
382
330
|
if (id >= 0 && id < m->number_of_matches) {
|
383
|
-
match = &m->matches[id];
|
331
|
+
re2::StringPiece *match = &m->matches[id];
|
384
332
|
|
385
333
|
if (!match->empty()) {
|
386
334
|
return match;
|
@@ -399,7 +347,7 @@ re2::StringPiece *re2_matchdata_find_match(VALUE idx, VALUE self) {
|
|
399
347
|
* m.size #=> 2
|
400
348
|
* m.length #=> 2
|
401
349
|
*/
|
402
|
-
static VALUE re2_matchdata_size(VALUE self) {
|
350
|
+
static VALUE re2_matchdata_size(const VALUE self) {
|
403
351
|
re2_matchdata *m;
|
404
352
|
Data_Get_Struct(self, re2_matchdata, m);
|
405
353
|
|
@@ -416,23 +364,18 @@ static VALUE re2_matchdata_size(VALUE self) {
|
|
416
364
|
* m.begin(0) #=> 1
|
417
365
|
* m.begin(1) #=> 4
|
418
366
|
*/
|
419
|
-
static VALUE re2_matchdata_begin(VALUE self, VALUE n) {
|
367
|
+
static VALUE re2_matchdata_begin(const VALUE self, VALUE n) {
|
420
368
|
re2_matchdata *m;
|
421
|
-
re2_pattern *p;
|
422
|
-
re2::StringPiece *match;
|
423
|
-
long offset;
|
424
369
|
|
425
370
|
Data_Get_Struct(self, re2_matchdata, m);
|
426
|
-
Data_Get_Struct(m->regexp, re2_pattern, p);
|
427
371
|
|
428
|
-
match = re2_matchdata_find_match(n, self);
|
372
|
+
re2::StringPiece *match = re2_matchdata_find_match(n, self);
|
429
373
|
if (match == NULL) {
|
430
374
|
return Qnil;
|
431
375
|
} else {
|
432
|
-
offset =
|
376
|
+
long offset = match->data() - RSTRING_PTR(m->text);
|
433
377
|
|
434
|
-
return
|
435
|
-
p->pattern->options().encoding() == RE2::Options::EncodingUTF8 ? "UTF-8" : "ISO-8859-1");
|
378
|
+
return LONG2NUM(rb_str_sublen(m->text, offset));
|
436
379
|
}
|
437
380
|
}
|
438
381
|
|
@@ -446,24 +389,18 @@ static VALUE re2_matchdata_begin(VALUE self, VALUE n) {
|
|
446
389
|
* m.end(0) #=> 9
|
447
390
|
* m.end(1) #=> 7
|
448
391
|
*/
|
449
|
-
static VALUE re2_matchdata_end(VALUE self, VALUE n) {
|
392
|
+
static VALUE re2_matchdata_end(const VALUE self, VALUE n) {
|
450
393
|
re2_matchdata *m;
|
451
|
-
re2_pattern *p;
|
452
|
-
re2::StringPiece *match;
|
453
|
-
long offset;
|
454
394
|
|
455
395
|
Data_Get_Struct(self, re2_matchdata, m);
|
456
|
-
Data_Get_Struct(m->regexp, re2_pattern, p);
|
457
|
-
|
458
|
-
match = re2_matchdata_find_match(n, self);
|
459
396
|
|
397
|
+
re2::StringPiece *match = re2_matchdata_find_match(n, self);
|
460
398
|
if (match == NULL) {
|
461
399
|
return Qnil;
|
462
400
|
} else {
|
463
|
-
offset =
|
401
|
+
long offset = (match->data() - RSTRING_PTR(m->text)) + match->size();
|
464
402
|
|
465
|
-
return
|
466
|
-
p->pattern->options().encoding() == RE2::Options::EncodingUTF8 ? "UTF-8" : "ISO-8859-1");
|
403
|
+
return LONG2NUM(rb_str_sublen(m->text, offset));
|
467
404
|
}
|
468
405
|
}
|
469
406
|
|
@@ -475,9 +412,10 @@ static VALUE re2_matchdata_end(VALUE self, VALUE n) {
|
|
475
412
|
* m = RE2::Regexp.new('(\d+)').match("bob 123")
|
476
413
|
* m.regexp #=> #<RE2::Regexp /(\d+)/>
|
477
414
|
*/
|
478
|
-
static VALUE re2_matchdata_regexp(VALUE self) {
|
415
|
+
static VALUE re2_matchdata_regexp(const VALUE self) {
|
479
416
|
re2_matchdata *m;
|
480
417
|
Data_Get_Struct(self, re2_matchdata, m);
|
418
|
+
|
481
419
|
return m->regexp;
|
482
420
|
}
|
483
421
|
|
@@ -489,7 +427,7 @@ static VALUE re2_matchdata_regexp(VALUE self) {
|
|
489
427
|
* c = RE2::Regexp.new('(\d+)').scan("bob 123")
|
490
428
|
* c.regexp #=> #<RE2::Regexp /(\d+)/>
|
491
429
|
*/
|
492
|
-
static VALUE re2_scanner_regexp(VALUE self) {
|
430
|
+
static VALUE re2_scanner_regexp(const VALUE self) {
|
493
431
|
re2_scanner *c;
|
494
432
|
Data_Get_Struct(self, re2_scanner, c);
|
495
433
|
|
@@ -498,46 +436,47 @@ static VALUE re2_scanner_regexp(VALUE self) {
|
|
498
436
|
|
499
437
|
static VALUE re2_regexp_allocate(VALUE klass) {
|
500
438
|
re2_pattern *p;
|
439
|
+
|
501
440
|
return Data_Make_Struct(klass, re2_pattern, 0, re2_regexp_free, p);
|
502
441
|
}
|
503
442
|
|
504
443
|
/*
|
505
444
|
* Returns the array of matches.
|
506
445
|
*
|
446
|
+
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
447
|
+
* returned in UTF-8 by default or ISO-8859-1 if the :utf8 option for the
|
448
|
+
* RE2::Regexp is set to false (any other encoding's behaviour is undefined).
|
449
|
+
*
|
507
450
|
* @return [Array<String, nil>] the array of matches
|
508
451
|
* @example
|
509
452
|
* m = RE2::Regexp.new('(\d+)').match("bob 123")
|
510
453
|
* m.to_a #=> ["123", "123"]
|
511
454
|
*/
|
512
|
-
static VALUE re2_matchdata_to_a(VALUE self) {
|
513
|
-
int i;
|
455
|
+
static VALUE re2_matchdata_to_a(const VALUE self) {
|
514
456
|
re2_matchdata *m;
|
515
457
|
re2_pattern *p;
|
516
|
-
re2::StringPiece *match;
|
517
|
-
VALUE array;
|
518
458
|
|
519
459
|
Data_Get_Struct(self, re2_matchdata, m);
|
520
460
|
Data_Get_Struct(m->regexp, re2_pattern, p);
|
521
461
|
|
522
|
-
array = rb_ary_new2(m->number_of_matches);
|
523
|
-
for (i = 0; i < m->number_of_matches; i
|
524
|
-
match = &m->matches[i];
|
462
|
+
VALUE array = rb_ary_new2(m->number_of_matches);
|
463
|
+
for (int i = 0; i < m->number_of_matches; ++i) {
|
464
|
+
re2::StringPiece *match = &m->matches[i];
|
525
465
|
|
526
466
|
if (match->empty()) {
|
527
467
|
rb_ary_push(array, Qnil);
|
528
468
|
} else {
|
529
|
-
rb_ary_push(array,
|
530
|
-
p->pattern->options().encoding()
|
469
|
+
rb_ary_push(array, encoded_str_new(match->data(), match->size(),
|
470
|
+
p->pattern->options().encoding()));
|
531
471
|
}
|
532
472
|
}
|
533
473
|
|
534
474
|
return array;
|
535
475
|
}
|
536
476
|
|
537
|
-
static VALUE re2_matchdata_nth_match(int nth, VALUE self) {
|
477
|
+
static VALUE re2_matchdata_nth_match(int nth, const VALUE self) {
|
538
478
|
re2_matchdata *m;
|
539
479
|
re2_pattern *p;
|
540
|
-
re2::StringPiece *match;
|
541
480
|
|
542
481
|
Data_Get_Struct(self, re2_matchdata, m);
|
543
482
|
Data_Get_Struct(m->regexp, re2_pattern, p);
|
@@ -545,32 +484,29 @@ static VALUE re2_matchdata_nth_match(int nth, VALUE self) {
|
|
545
484
|
if (nth < 0 || nth >= m->number_of_matches) {
|
546
485
|
return Qnil;
|
547
486
|
} else {
|
548
|
-
match = &m->matches[nth];
|
487
|
+
re2::StringPiece *match = &m->matches[nth];
|
549
488
|
|
550
489
|
if (match->empty()) {
|
551
490
|
return Qnil;
|
552
491
|
} else {
|
553
|
-
return
|
554
|
-
p->pattern->options().encoding()
|
492
|
+
return encoded_str_new(match->data(), match->size(),
|
493
|
+
p->pattern->options().encoding());
|
555
494
|
}
|
556
495
|
}
|
557
496
|
}
|
558
497
|
|
559
|
-
static VALUE re2_matchdata_named_match(const char* name, VALUE self) {
|
560
|
-
int idx;
|
498
|
+
static VALUE re2_matchdata_named_match(const char* name, const VALUE self) {
|
561
499
|
re2_matchdata *m;
|
562
500
|
re2_pattern *p;
|
563
|
-
map<string, int> groups;
|
564
|
-
string name_as_string(name);
|
565
501
|
|
566
502
|
Data_Get_Struct(self, re2_matchdata, m);
|
567
503
|
Data_Get_Struct(m->regexp, re2_pattern, p);
|
568
504
|
|
569
|
-
groups = p->pattern->NamedCapturingGroups();
|
505
|
+
const std::map<std::string, int>& groups = p->pattern->NamedCapturingGroups();
|
506
|
+
std::map<std::string, int>::const_iterator search = groups.find(name);
|
570
507
|
|
571
|
-
if (groups.
|
572
|
-
|
573
|
-
return re2_matchdata_nth_match(idx, self);
|
508
|
+
if (search != groups.end()) {
|
509
|
+
return re2_matchdata_nth_match(search->second, self);
|
574
510
|
} else {
|
575
511
|
return Qnil;
|
576
512
|
}
|
@@ -579,6 +515,10 @@ static VALUE re2_matchdata_named_match(const char* name, VALUE self) {
|
|
579
515
|
/*
|
580
516
|
* Retrieve zero, one or more matches by index or name.
|
581
517
|
*
|
518
|
+
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
519
|
+
* returned in UTF-8 by default or ISO-8859-1 if the :utf8 option for the
|
520
|
+
* RE2::Regexp is set to false (any other encoding's behaviour is undefined).
|
521
|
+
*
|
582
522
|
* @return [Array<String, nil>, String, Boolean]
|
583
523
|
*
|
584
524
|
* @overload [](index)
|
@@ -619,12 +559,12 @@ static VALUE re2_matchdata_named_match(const char* name, VALUE self) {
|
|
619
559
|
* m["number"] #=> "123"
|
620
560
|
* m[:number] #=> "123"
|
621
561
|
*/
|
622
|
-
static VALUE re2_matchdata_aref(int argc, VALUE *argv, VALUE self) {
|
562
|
+
static VALUE re2_matchdata_aref(int argc, VALUE *argv, const VALUE self) {
|
623
563
|
VALUE idx, rest;
|
624
564
|
rb_scan_args(argc, argv, "11", &idx, &rest);
|
625
565
|
|
626
566
|
if (TYPE(idx) == T_STRING) {
|
627
|
-
return re2_matchdata_named_match(
|
567
|
+
return re2_matchdata_named_match(RSTRING_PTR(idx), self);
|
628
568
|
} else if (SYMBOL_P(idx)) {
|
629
569
|
return re2_matchdata_named_match(rb_id2name(SYM2ID(idx)), self);
|
630
570
|
} else if (!NIL_P(rest) || !FIXNUM_P(idx) || FIX2INT(idx) < 0) {
|
@@ -639,57 +579,61 @@ static VALUE re2_matchdata_aref(int argc, VALUE *argv, VALUE self) {
|
|
639
579
|
*
|
640
580
|
* @return [String] the entire matched string
|
641
581
|
*/
|
642
|
-
static VALUE re2_matchdata_to_s(VALUE self) {
|
582
|
+
static VALUE re2_matchdata_to_s(const VALUE self) {
|
643
583
|
return re2_matchdata_nth_match(0, self);
|
644
584
|
}
|
645
585
|
|
646
586
|
/*
|
647
587
|
* Returns a printable version of the match.
|
648
588
|
*
|
589
|
+
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
590
|
+
* returned in UTF-8 by default or ISO-8859-1 if the :utf8 option for the
|
591
|
+
* RE2::Regexp is set to false (any other encoding's behaviour is undefined).
|
592
|
+
*
|
649
593
|
* @return [String] a printable version of the match
|
650
594
|
* @example
|
651
595
|
* m = RE2::Regexp.new('(\d+)').match("bob 123")
|
652
596
|
* m.inspect #=> "#<RE2::MatchData \"123\" 1:\"123\">"
|
653
597
|
*/
|
654
|
-
static VALUE re2_matchdata_inspect(VALUE self) {
|
655
|
-
int i;
|
598
|
+
static VALUE re2_matchdata_inspect(const VALUE self) {
|
656
599
|
re2_matchdata *m;
|
657
600
|
re2_pattern *p;
|
658
|
-
VALUE match, result;
|
659
|
-
ostringstream output;
|
660
601
|
|
661
602
|
Data_Get_Struct(self, re2_matchdata, m);
|
662
603
|
Data_Get_Struct(m->regexp, re2_pattern, p);
|
663
604
|
|
605
|
+
std::ostringstream output;
|
664
606
|
output << "#<RE2::MatchData";
|
665
607
|
|
666
|
-
for (i = 0; i < m->number_of_matches; i
|
608
|
+
for (int i = 0; i < m->number_of_matches; ++i) {
|
667
609
|
output << " ";
|
668
610
|
|
669
611
|
if (i > 0) {
|
670
612
|
output << i << ":";
|
671
613
|
}
|
672
614
|
|
673
|
-
match = re2_matchdata_nth_match(i, self);
|
615
|
+
VALUE match = re2_matchdata_nth_match(i, self);
|
674
616
|
|
675
617
|
if (match == Qnil) {
|
676
618
|
output << "nil";
|
677
619
|
} else {
|
678
|
-
output << "\"" <<
|
620
|
+
output << "\"" << RSTRING_PTR(match) << "\"";
|
679
621
|
}
|
680
622
|
}
|
681
623
|
|
682
624
|
output << ">";
|
683
625
|
|
684
|
-
|
685
|
-
p->pattern->options().encoding()
|
686
|
-
|
687
|
-
return result;
|
626
|
+
return encoded_str_new(output.str().data(), output.str().length(),
|
627
|
+
p->pattern->options().encoding());
|
688
628
|
}
|
689
629
|
|
690
630
|
/*
|
691
631
|
* Returns the array of submatches for pattern matching.
|
692
632
|
*
|
633
|
+
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
634
|
+
* returned in UTF-8 by default or ISO-8859-1 if the :utf8 option for the
|
635
|
+
* RE2::Regexp is set to false (any other encoding's behaviour is undefined).
|
636
|
+
*
|
693
637
|
* @return [Array<String, nil>] the array of submatches
|
694
638
|
* @example
|
695
639
|
* m = RE2::Regexp.new('(\d+)').match("bob 123")
|
@@ -703,25 +647,22 @@ static VALUE re2_matchdata_inspect(VALUE self) {
|
|
703
647
|
* puts "Unrecognised match"
|
704
648
|
* end
|
705
649
|
*/
|
706
|
-
static VALUE re2_matchdata_deconstruct(VALUE self) {
|
707
|
-
int i;
|
650
|
+
static VALUE re2_matchdata_deconstruct(const VALUE self) {
|
708
651
|
re2_matchdata *m;
|
709
652
|
re2_pattern *p;
|
710
|
-
re2::StringPiece *match;
|
711
|
-
VALUE array;
|
712
653
|
|
713
654
|
Data_Get_Struct(self, re2_matchdata, m);
|
714
655
|
Data_Get_Struct(m->regexp, re2_pattern, p);
|
715
656
|
|
716
|
-
array = rb_ary_new2(m->number_of_matches - 1);
|
717
|
-
for (i = 1; i < m->number_of_matches; i
|
718
|
-
match = &m->matches[i];
|
657
|
+
VALUE array = rb_ary_new2(m->number_of_matches - 1);
|
658
|
+
for (int i = 1; i < m->number_of_matches; ++i) {
|
659
|
+
re2::StringPiece *match = &m->matches[i];
|
719
660
|
|
720
661
|
if (match->empty()) {
|
721
662
|
rb_ary_push(array, Qnil);
|
722
663
|
} else {
|
723
|
-
rb_ary_push(array,
|
724
|
-
p->pattern->options().encoding()
|
664
|
+
rb_ary_push(array, encoded_str_new(match->data(), match->size(),
|
665
|
+
p->pattern->options().encoding()));
|
725
666
|
}
|
726
667
|
}
|
727
668
|
|
@@ -735,6 +676,10 @@ static VALUE re2_matchdata_deconstruct(VALUE self) {
|
|
735
676
|
* more keys than there are capturing groups. Given keys will populate the hash in
|
736
677
|
* order but an invalid name will cause the hash to be immediately returned.
|
737
678
|
*
|
679
|
+
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
680
|
+
* returned in UTF-8 by default or ISO-8859-1 if the :utf8 option for the
|
681
|
+
* RE2::Regexp is set to false (any other encoding's behaviour is undefined).
|
682
|
+
*
|
738
683
|
* @return [Hash] a hash of capturing group names to submatches
|
739
684
|
* @param [Array<Symbol>, nil] keys an array of Symbol capturing group names or nil to return all names
|
740
685
|
* @example
|
@@ -752,40 +697,37 @@ static VALUE re2_matchdata_deconstruct(VALUE self) {
|
|
752
697
|
* puts "Unrecognised match"
|
753
698
|
* end
|
754
699
|
*/
|
755
|
-
static VALUE re2_matchdata_deconstruct_keys(VALUE self, VALUE keys) {
|
756
|
-
int i;
|
757
|
-
VALUE capturing_groups, key;
|
700
|
+
static VALUE re2_matchdata_deconstruct_keys(const VALUE self, const VALUE keys) {
|
758
701
|
re2_matchdata *m;
|
759
702
|
re2_pattern *p;
|
760
|
-
map<string, int> groups;
|
761
|
-
map<string, int>::iterator iterator;
|
762
703
|
|
763
704
|
Data_Get_Struct(self, re2_matchdata, m);
|
764
705
|
Data_Get_Struct(m->regexp, re2_pattern, p);
|
765
706
|
|
766
|
-
groups = p->pattern->NamedCapturingGroups();
|
767
|
-
capturing_groups = rb_hash_new();
|
707
|
+
const std::map<std::string, int>& groups = p->pattern->NamedCapturingGroups();
|
708
|
+
VALUE capturing_groups = rb_hash_new();
|
768
709
|
|
769
710
|
if (NIL_P(keys)) {
|
770
|
-
for (
|
711
|
+
for (std::map<std::string, int>::const_iterator it = groups.begin(); it != groups.end(); ++it) {
|
771
712
|
rb_hash_aset(capturing_groups,
|
772
|
-
ID2SYM(rb_intern(
|
773
|
-
re2_matchdata_nth_match(
|
713
|
+
ID2SYM(rb_intern(it->first.data())),
|
714
|
+
re2_matchdata_nth_match(it->second, self));
|
774
715
|
}
|
775
716
|
} else {
|
776
717
|
Check_Type(keys, T_ARRAY);
|
777
718
|
|
778
719
|
if (p->pattern->NumberOfCapturingGroups() >= RARRAY_LEN(keys)) {
|
779
|
-
for (i = 0; i < RARRAY_LEN(keys); i
|
780
|
-
key = rb_ary_entry(keys, i);
|
720
|
+
for (int i = 0; i < RARRAY_LEN(keys); ++i) {
|
721
|
+
VALUE key = rb_ary_entry(keys, i);
|
781
722
|
Check_Type(key, T_SYMBOL);
|
782
|
-
|
723
|
+
const char *name = rb_id2name(SYM2ID(key));
|
724
|
+
std::map<std::string, int>::const_iterator search = groups.find(name);
|
783
725
|
|
784
|
-
if (groups.
|
726
|
+
if (search != groups.end()) {
|
727
|
+
rb_hash_aset(capturing_groups, key, re2_matchdata_nth_match(search->second, self));
|
728
|
+
} else {
|
785
729
|
break;
|
786
730
|
}
|
787
|
-
|
788
|
-
rb_hash_aset(capturing_groups, key, re2_matchdata_nth_match(groups[name], self));
|
789
731
|
}
|
790
732
|
}
|
791
733
|
}
|
@@ -800,8 +742,7 @@ static VALUE re2_matchdata_deconstruct_keys(VALUE self, VALUE keys) {
|
|
800
742
|
* @see RE2::Regexp#initialize
|
801
743
|
*
|
802
744
|
*/
|
803
|
-
static VALUE re2_re2(int argc, VALUE *argv, VALUE
|
804
|
-
UNUSED(self);
|
745
|
+
static VALUE re2_re2(int argc, VALUE *argv, VALUE) {
|
805
746
|
return rb_class_new_instance(argc, argv, re2_cRegexp);
|
806
747
|
}
|
807
748
|
|
@@ -845,15 +786,19 @@ static VALUE re2_regexp_initialize(int argc, VALUE *argv, VALUE self) {
|
|
845
786
|
re2_pattern *p;
|
846
787
|
|
847
788
|
rb_scan_args(argc, argv, "11", &pattern, &options);
|
789
|
+
|
790
|
+
/* Ensure pattern is a string. */
|
791
|
+
StringValue(pattern);
|
792
|
+
|
848
793
|
Data_Get_Struct(self, re2_pattern, p);
|
849
794
|
|
850
795
|
if (RTEST(options)) {
|
851
796
|
RE2::Options re2_options;
|
852
|
-
parse_re2_options(re2_options, options);
|
797
|
+
parse_re2_options(&re2_options, options);
|
853
798
|
|
854
|
-
p->pattern = new(nothrow) RE2(
|
799
|
+
p->pattern = new(std::nothrow) RE2(RSTRING_PTR(pattern), re2_options);
|
855
800
|
} else {
|
856
|
-
p->pattern = new(nothrow) RE2(
|
801
|
+
p->pattern = new(std::nothrow) RE2(RSTRING_PTR(pattern));
|
857
802
|
}
|
858
803
|
|
859
804
|
if (p->pattern == 0) {
|
@@ -866,40 +811,47 @@ static VALUE re2_regexp_initialize(int argc, VALUE *argv, VALUE self) {
|
|
866
811
|
/*
|
867
812
|
* Returns a printable version of the regular expression +re2+.
|
868
813
|
*
|
814
|
+
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
815
|
+
* returned in UTF-8 by default or ISO-8859-1 if the :utf8 option for the
|
816
|
+
* RE2::Regexp is set to false (any other encoding's behaviour is undefined).
|
817
|
+
*
|
869
818
|
* @return [String] a printable version of the regular expression
|
870
819
|
* @example
|
871
820
|
* re2 = RE2::Regexp.new("woo?")
|
872
821
|
* re2.inspect #=> "#<RE2::Regexp /woo?/>"
|
873
822
|
*/
|
874
|
-
static VALUE re2_regexp_inspect(VALUE self) {
|
823
|
+
static VALUE re2_regexp_inspect(const VALUE self) {
|
875
824
|
re2_pattern *p;
|
876
|
-
VALUE result;
|
877
|
-
ostringstream output;
|
878
825
|
|
879
826
|
Data_Get_Struct(self, re2_pattern, p);
|
880
827
|
|
881
|
-
|
828
|
+
std::ostringstream output;
|
882
829
|
|
883
|
-
|
884
|
-
p->pattern->options().encoding() == RE2::Options::EncodingUTF8 ? "UTF-8" : "ISO-8859-1");
|
830
|
+
output << "#<RE2::Regexp /" << p->pattern->pattern() << "/>";
|
885
831
|
|
886
|
-
return
|
832
|
+
return encoded_str_new(output.str().data(), output.str().length(),
|
833
|
+
p->pattern->options().encoding());
|
887
834
|
}
|
888
835
|
|
889
836
|
/*
|
890
837
|
* Returns a string version of the regular expression +re2+.
|
891
838
|
*
|
839
|
+
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
840
|
+
* returned in UTF-8 by default or ISO-8859-1 if the :utf8 option for the
|
841
|
+
* RE2::Regexp is set to false (any other encoding's behaviour is undefined).
|
842
|
+
*
|
892
843
|
* @return [String] a string version of the regular expression
|
893
844
|
* @example
|
894
845
|
* re2 = RE2::Regexp.new("woo?")
|
895
846
|
* re2.to_s #=> "woo?"
|
896
847
|
*/
|
897
|
-
static VALUE re2_regexp_to_s(VALUE self) {
|
848
|
+
static VALUE re2_regexp_to_s(const VALUE self) {
|
898
849
|
re2_pattern *p;
|
899
850
|
Data_Get_Struct(self, re2_pattern, p);
|
900
|
-
|
851
|
+
|
852
|
+
return encoded_str_new(p->pattern->pattern().data(),
|
901
853
|
p->pattern->pattern().size(),
|
902
|
-
p->pattern->options().encoding()
|
854
|
+
p->pattern->options().encoding());
|
903
855
|
}
|
904
856
|
|
905
857
|
/*
|
@@ -911,9 +863,10 @@ static VALUE re2_regexp_to_s(VALUE self) {
|
|
911
863
|
* re2 = RE2::Regexp.new("woo?")
|
912
864
|
* re2.ok? #=> true
|
913
865
|
*/
|
914
|
-
static VALUE re2_regexp_ok(VALUE self) {
|
866
|
+
static VALUE re2_regexp_ok(const VALUE self) {
|
915
867
|
re2_pattern *p;
|
916
868
|
Data_Get_Struct(self, re2_pattern, p);
|
869
|
+
|
917
870
|
return BOOL2RUBY(p->pattern->ok());
|
918
871
|
}
|
919
872
|
|
@@ -926,9 +879,10 @@ static VALUE re2_regexp_ok(VALUE self) {
|
|
926
879
|
* re2 = RE2::Regexp.new("woo?", :utf8 => true)
|
927
880
|
* re2.utf8? #=> true
|
928
881
|
*/
|
929
|
-
static VALUE re2_regexp_utf8(VALUE self) {
|
882
|
+
static VALUE re2_regexp_utf8(const VALUE self) {
|
930
883
|
re2_pattern *p;
|
931
884
|
Data_Get_Struct(self, re2_pattern, p);
|
885
|
+
|
932
886
|
return BOOL2RUBY(p->pattern->options().encoding() == RE2::Options::EncodingUTF8);
|
933
887
|
}
|
934
888
|
|
@@ -941,9 +895,10 @@ static VALUE re2_regexp_utf8(VALUE self) {
|
|
941
895
|
* re2 = RE2::Regexp.new("woo?", :posix_syntax => true)
|
942
896
|
* re2.posix_syntax? #=> true
|
943
897
|
*/
|
944
|
-
static VALUE re2_regexp_posix_syntax(VALUE self) {
|
898
|
+
static VALUE re2_regexp_posix_syntax(const VALUE self) {
|
945
899
|
re2_pattern *p;
|
946
900
|
Data_Get_Struct(self, re2_pattern, p);
|
901
|
+
|
947
902
|
return BOOL2RUBY(p->pattern->options().posix_syntax());
|
948
903
|
}
|
949
904
|
|
@@ -956,9 +911,10 @@ static VALUE re2_regexp_posix_syntax(VALUE self) {
|
|
956
911
|
* re2 = RE2::Regexp.new("woo?", :longest_match => true)
|
957
912
|
* re2.longest_match? #=> true
|
958
913
|
*/
|
959
|
-
static VALUE re2_regexp_longest_match(VALUE self) {
|
914
|
+
static VALUE re2_regexp_longest_match(const VALUE self) {
|
960
915
|
re2_pattern *p;
|
961
916
|
Data_Get_Struct(self, re2_pattern, p);
|
917
|
+
|
962
918
|
return BOOL2RUBY(p->pattern->options().longest_match());
|
963
919
|
}
|
964
920
|
|
@@ -971,9 +927,10 @@ static VALUE re2_regexp_longest_match(VALUE self) {
|
|
971
927
|
* re2 = RE2::Regexp.new("woo?", :log_errors => true)
|
972
928
|
* re2.log_errors? #=> true
|
973
929
|
*/
|
974
|
-
static VALUE re2_regexp_log_errors(VALUE self) {
|
930
|
+
static VALUE re2_regexp_log_errors(const VALUE self) {
|
975
931
|
re2_pattern *p;
|
976
932
|
Data_Get_Struct(self, re2_pattern, p);
|
933
|
+
|
977
934
|
return BOOL2RUBY(p->pattern->options().log_errors());
|
978
935
|
}
|
979
936
|
|
@@ -986,9 +943,10 @@ static VALUE re2_regexp_log_errors(VALUE self) {
|
|
986
943
|
* re2 = RE2::Regexp.new("woo?", :max_mem => 1024)
|
987
944
|
* re2.max_mem #=> 1024
|
988
945
|
*/
|
989
|
-
static VALUE re2_regexp_max_mem(VALUE self) {
|
946
|
+
static VALUE re2_regexp_max_mem(const VALUE self) {
|
990
947
|
re2_pattern *p;
|
991
948
|
Data_Get_Struct(self, re2_pattern, p);
|
949
|
+
|
992
950
|
return INT2FIX(p->pattern->options().max_mem());
|
993
951
|
}
|
994
952
|
|
@@ -1001,9 +959,10 @@ static VALUE re2_regexp_max_mem(VALUE self) {
|
|
1001
959
|
* re2 = RE2::Regexp.new("woo?", :literal => true)
|
1002
960
|
* re2.literal? #=> true
|
1003
961
|
*/
|
1004
|
-
static VALUE re2_regexp_literal(VALUE self) {
|
962
|
+
static VALUE re2_regexp_literal(const VALUE self) {
|
1005
963
|
re2_pattern *p;
|
1006
964
|
Data_Get_Struct(self, re2_pattern, p);
|
965
|
+
|
1007
966
|
return BOOL2RUBY(p->pattern->options().literal());
|
1008
967
|
}
|
1009
968
|
|
@@ -1016,9 +975,10 @@ static VALUE re2_regexp_literal(VALUE self) {
|
|
1016
975
|
* re2 = RE2::Regexp.new("woo?", :never_nl => true)
|
1017
976
|
* re2.never_nl? #=> true
|
1018
977
|
*/
|
1019
|
-
static VALUE re2_regexp_never_nl(VALUE self) {
|
978
|
+
static VALUE re2_regexp_never_nl(const VALUE self) {
|
1020
979
|
re2_pattern *p;
|
1021
980
|
Data_Get_Struct(self, re2_pattern, p);
|
981
|
+
|
1022
982
|
return BOOL2RUBY(p->pattern->options().never_nl());
|
1023
983
|
}
|
1024
984
|
|
@@ -1031,9 +991,10 @@ static VALUE re2_regexp_never_nl(VALUE self) {
|
|
1031
991
|
* re2 = RE2::Regexp.new("woo?", :case_sensitive => true)
|
1032
992
|
* re2.case_sensitive? #=> true
|
1033
993
|
*/
|
1034
|
-
static VALUE re2_regexp_case_sensitive(VALUE self) {
|
994
|
+
static VALUE re2_regexp_case_sensitive(const VALUE self) {
|
1035
995
|
re2_pattern *p;
|
1036
996
|
Data_Get_Struct(self, re2_pattern, p);
|
997
|
+
|
1037
998
|
return BOOL2RUBY(p->pattern->options().case_sensitive());
|
1038
999
|
}
|
1039
1000
|
|
@@ -1047,7 +1008,7 @@ static VALUE re2_regexp_case_sensitive(VALUE self) {
|
|
1047
1008
|
* re2.case_insensitive? #=> false
|
1048
1009
|
* re2.casefold? #=> false
|
1049
1010
|
*/
|
1050
|
-
static VALUE re2_regexp_case_insensitive(VALUE self) {
|
1011
|
+
static VALUE re2_regexp_case_insensitive(const VALUE self) {
|
1051
1012
|
return BOOL2RUBY(re2_regexp_case_sensitive(self) != Qtrue);
|
1052
1013
|
}
|
1053
1014
|
|
@@ -1060,9 +1021,10 @@ static VALUE re2_regexp_case_insensitive(VALUE self) {
|
|
1060
1021
|
* re2 = RE2::Regexp.new("woo?", :perl_classes => true)
|
1061
1022
|
* re2.perl_classes? #=> true
|
1062
1023
|
*/
|
1063
|
-
static VALUE re2_regexp_perl_classes(VALUE self) {
|
1024
|
+
static VALUE re2_regexp_perl_classes(const VALUE self) {
|
1064
1025
|
re2_pattern *p;
|
1065
1026
|
Data_Get_Struct(self, re2_pattern, p);
|
1027
|
+
|
1066
1028
|
return BOOL2RUBY(p->pattern->options().perl_classes());
|
1067
1029
|
}
|
1068
1030
|
|
@@ -1075,9 +1037,10 @@ static VALUE re2_regexp_perl_classes(VALUE self) {
|
|
1075
1037
|
* re2 = RE2::Regexp.new("woo?", :word_boundary => true)
|
1076
1038
|
* re2.word_boundary? #=> true
|
1077
1039
|
*/
|
1078
|
-
static VALUE re2_regexp_word_boundary(VALUE self) {
|
1040
|
+
static VALUE re2_regexp_word_boundary(const VALUE self) {
|
1079
1041
|
re2_pattern *p;
|
1080
1042
|
Data_Get_Struct(self, re2_pattern, p);
|
1043
|
+
|
1081
1044
|
return BOOL2RUBY(p->pattern->options().word_boundary());
|
1082
1045
|
}
|
1083
1046
|
|
@@ -1090,9 +1053,10 @@ static VALUE re2_regexp_word_boundary(VALUE self) {
|
|
1090
1053
|
* re2 = RE2::Regexp.new("woo?", :one_line => true)
|
1091
1054
|
* re2.one_line? #=> true
|
1092
1055
|
*/
|
1093
|
-
static VALUE re2_regexp_one_line(VALUE self) {
|
1056
|
+
static VALUE re2_regexp_one_line(const VALUE self) {
|
1094
1057
|
re2_pattern *p;
|
1095
1058
|
Data_Get_Struct(self, re2_pattern, p);
|
1059
|
+
|
1096
1060
|
return BOOL2RUBY(p->pattern->options().one_line());
|
1097
1061
|
}
|
1098
1062
|
|
@@ -1102,9 +1066,10 @@ static VALUE re2_regexp_one_line(VALUE self) {
|
|
1102
1066
|
*
|
1103
1067
|
* @return [String, nil] the error string or nil
|
1104
1068
|
*/
|
1105
|
-
static VALUE re2_regexp_error(VALUE self) {
|
1069
|
+
static VALUE re2_regexp_error(const VALUE self) {
|
1106
1070
|
re2_pattern *p;
|
1107
1071
|
Data_Get_Struct(self, re2_pattern, p);
|
1072
|
+
|
1108
1073
|
if (p->pattern->ok()) {
|
1109
1074
|
return Qnil;
|
1110
1075
|
} else {
|
@@ -1116,17 +1081,22 @@ static VALUE re2_regexp_error(VALUE self) {
|
|
1116
1081
|
* If the RE2 could not be created properly, returns
|
1117
1082
|
* the offending portion of the regexp otherwise returns nil.
|
1118
1083
|
*
|
1084
|
+
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
1085
|
+
* returned in UTF-8 by default or ISO-8859-1 if the :utf8 option for the
|
1086
|
+
* RE2::Regexp is set to false (any other encoding's behaviour is undefined).
|
1087
|
+
*
|
1119
1088
|
* @return [String, nil] the offending portion of the regexp or nil
|
1120
1089
|
*/
|
1121
|
-
static VALUE re2_regexp_error_arg(VALUE self) {
|
1090
|
+
static VALUE re2_regexp_error_arg(const VALUE self) {
|
1122
1091
|
re2_pattern *p;
|
1123
1092
|
Data_Get_Struct(self, re2_pattern, p);
|
1093
|
+
|
1124
1094
|
if (p->pattern->ok()) {
|
1125
1095
|
return Qnil;
|
1126
1096
|
} else {
|
1127
|
-
return
|
1097
|
+
return encoded_str_new(p->pattern->error_arg().data(),
|
1128
1098
|
p->pattern->error_arg().size(),
|
1129
|
-
p->pattern->options().encoding()
|
1099
|
+
p->pattern->options().encoding());
|
1130
1100
|
}
|
1131
1101
|
}
|
1132
1102
|
|
@@ -1137,9 +1107,10 @@ static VALUE re2_regexp_error_arg(VALUE self) {
|
|
1137
1107
|
*
|
1138
1108
|
* @return [Integer] the regexp "cost"
|
1139
1109
|
*/
|
1140
|
-
static VALUE re2_regexp_program_size(VALUE self) {
|
1110
|
+
static VALUE re2_regexp_program_size(const VALUE self) {
|
1141
1111
|
re2_pattern *p;
|
1142
1112
|
Data_Get_Struct(self, re2_pattern, p);
|
1113
|
+
|
1143
1114
|
return INT2FIX(p->pattern->ProgramSize());
|
1144
1115
|
}
|
1145
1116
|
|
@@ -1149,12 +1120,11 @@ static VALUE re2_regexp_program_size(VALUE self) {
|
|
1149
1120
|
*
|
1150
1121
|
* @return [Hash] the options
|
1151
1122
|
*/
|
1152
|
-
static VALUE re2_regexp_options(VALUE self) {
|
1153
|
-
VALUE options;
|
1123
|
+
static VALUE re2_regexp_options(const VALUE self) {
|
1154
1124
|
re2_pattern *p;
|
1155
1125
|
|
1156
1126
|
Data_Get_Struct(self, re2_pattern, p);
|
1157
|
-
options = rb_hash_new();
|
1127
|
+
VALUE options = rb_hash_new();
|
1158
1128
|
|
1159
1129
|
rb_hash_aset(options, ID2SYM(id_utf8),
|
1160
1130
|
BOOL2RUBY(p->pattern->options().encoding() == RE2::Options::EncodingUTF8));
|
@@ -1202,33 +1172,34 @@ static VALUE re2_regexp_options(VALUE self) {
|
|
1202
1172
|
*
|
1203
1173
|
* @return [Integer] the number of capturing subpatterns
|
1204
1174
|
*/
|
1205
|
-
static VALUE re2_regexp_number_of_capturing_groups(VALUE self) {
|
1175
|
+
static VALUE re2_regexp_number_of_capturing_groups(const VALUE self) {
|
1206
1176
|
re2_pattern *p;
|
1207
|
-
|
1208
1177
|
Data_Get_Struct(self, re2_pattern, p);
|
1178
|
+
|
1209
1179
|
return INT2FIX(p->pattern->NumberOfCapturingGroups());
|
1210
1180
|
}
|
1211
1181
|
|
1212
1182
|
/*
|
1213
1183
|
* Returns a hash of names to capturing indices of groups.
|
1214
1184
|
*
|
1185
|
+
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
1186
|
+
* returned in UTF-8 by default or ISO-8859-1 if the :utf8 option for the
|
1187
|
+
* RE2::Regexp is set to false (any other encoding's behaviour is undefined).
|
1188
|
+
*
|
1215
1189
|
* @return [Hash] a hash of names to capturing indices
|
1216
1190
|
*/
|
1217
|
-
static VALUE re2_regexp_named_capturing_groups(VALUE self) {
|
1218
|
-
VALUE capturing_groups;
|
1191
|
+
static VALUE re2_regexp_named_capturing_groups(const VALUE self) {
|
1219
1192
|
re2_pattern *p;
|
1220
|
-
map<string, int> groups;
|
1221
|
-
map<string, int>::iterator iterator;
|
1222
1193
|
|
1223
1194
|
Data_Get_Struct(self, re2_pattern, p);
|
1224
|
-
groups = p->pattern->NamedCapturingGroups();
|
1225
|
-
capturing_groups = rb_hash_new();
|
1195
|
+
const std::map<std::string, int>& groups = p->pattern->NamedCapturingGroups();
|
1196
|
+
VALUE capturing_groups = rb_hash_new();
|
1226
1197
|
|
1227
|
-
for (
|
1198
|
+
for (std::map<std::string, int>::const_iterator it = groups.begin(); it != groups.end(); ++it) {
|
1228
1199
|
rb_hash_aset(capturing_groups,
|
1229
|
-
|
1230
|
-
p->pattern->options().encoding()
|
1231
|
-
INT2FIX(
|
1200
|
+
encoded_str_new(it->first.data(), it->first.size(),
|
1201
|
+
p->pattern->options().encoding()),
|
1202
|
+
INT2FIX(it->second));
|
1232
1203
|
}
|
1233
1204
|
|
1234
1205
|
return capturing_groups;
|
@@ -1242,16 +1213,23 @@ static VALUE re2_regexp_named_capturing_groups(VALUE self) {
|
|
1242
1213
|
* @return [Boolean, RE2::MatchData]
|
1243
1214
|
*
|
1244
1215
|
* @overload match(text)
|
1245
|
-
* Returns an {RE2::MatchData} containing the matching
|
1246
|
-
*
|
1247
|
-
*
|
1216
|
+
* Returns an {RE2::MatchData} containing the matching pattern and all
|
1217
|
+
* subpatterns resulting from looking for the regexp in +text+ if the pattern
|
1218
|
+
* contains capturing groups.
|
1219
|
+
*
|
1220
|
+
* Returns either true or false indicating whether a successful match was
|
1221
|
+
* made if the pattern contains no capturing groups.
|
1248
1222
|
*
|
1249
1223
|
* @param [String] text the text to search
|
1250
|
-
* @return [RE2::MatchData] the
|
1224
|
+
* @return [RE2::MatchData] if the pattern contains capturing groups
|
1225
|
+
* @return [Boolean] if the pattern does not contain capturing groups
|
1251
1226
|
* @raise [NoMemoryError] if there was not enough memory to allocate the matches
|
1252
|
-
* @example
|
1227
|
+
* @example Matching with capturing groups
|
1253
1228
|
* r = RE2::Regexp.new('w(o)(o)')
|
1254
1229
|
* r.match('woo') #=> #<RE2::MatchData "woo" 1:"o" 2:"o">
|
1230
|
+
* @example Matching without capturing groups
|
1231
|
+
* r = RE2::Regexp.new('woo')
|
1232
|
+
* r.match('woo') #=> true
|
1255
1233
|
*
|
1256
1234
|
* @overload match(text, 0)
|
1257
1235
|
* Returns either true or false indicating whether a
|
@@ -1279,20 +1257,20 @@ static VALUE re2_regexp_named_capturing_groups(VALUE self) {
|
|
1279
1257
|
* r.match('woo', 1) #=> #<RE2::MatchData "woo" 1:"o">
|
1280
1258
|
* r.match('woo', 3) #=> #<RE2::MatchData "woo" 1:"o" 2:"o" 3:nil>
|
1281
1259
|
*/
|
1282
|
-
static VALUE re2_regexp_match(int argc, VALUE *argv, VALUE self) {
|
1283
|
-
int n;
|
1284
|
-
bool matched;
|
1260
|
+
static VALUE re2_regexp_match(int argc, VALUE *argv, const VALUE self) {
|
1285
1261
|
re2_pattern *p;
|
1286
1262
|
re2_matchdata *m;
|
1287
|
-
VALUE text, number_of_matches
|
1263
|
+
VALUE text, number_of_matches;
|
1288
1264
|
|
1289
1265
|
rb_scan_args(argc, argv, "11", &text, &number_of_matches);
|
1290
1266
|
|
1291
1267
|
/* Ensure text is a string. */
|
1292
|
-
|
1268
|
+
StringValue(text);
|
1293
1269
|
|
1294
1270
|
Data_Get_Struct(self, re2_pattern, p);
|
1295
1271
|
|
1272
|
+
int n;
|
1273
|
+
|
1296
1274
|
if (RTEST(number_of_matches)) {
|
1297
1275
|
n = NUM2INT(number_of_matches);
|
1298
1276
|
|
@@ -1308,17 +1286,21 @@ static VALUE re2_regexp_match(int argc, VALUE *argv, VALUE self) {
|
|
1308
1286
|
}
|
1309
1287
|
|
1310
1288
|
if (n == 0) {
|
1311
|
-
|
1312
|
-
|
1289
|
+
#ifdef HAVE_ENDPOS_ARGUMENT
|
1290
|
+
bool matched = p->pattern->Match(RSTRING_PTR(text), 0,
|
1291
|
+
RSTRING_LEN(text), RE2::UNANCHORED, 0, 0);
|
1292
|
+
#else
|
1293
|
+
bool matched = p->pattern->Match(RSTRING_PTR(text), 0, RE2::UNANCHORED,
|
1294
|
+
0, 0);
|
1295
|
+
#endif
|
1313
1296
|
return BOOL2RUBY(matched);
|
1314
1297
|
} else {
|
1315
|
-
|
1316
1298
|
/* Because match returns the whole match as well. */
|
1317
1299
|
n += 1;
|
1318
1300
|
|
1319
|
-
matchdata = rb_class_new_instance(0, 0, re2_cMatchData);
|
1301
|
+
VALUE matchdata = rb_class_new_instance(0, 0, re2_cMatchData);
|
1320
1302
|
Data_Get_Struct(matchdata, re2_matchdata, m);
|
1321
|
-
m->matches = new(nothrow) re2::StringPiece[n];
|
1303
|
+
m->matches = new(std::nothrow) re2::StringPiece[n];
|
1322
1304
|
m->regexp = self;
|
1323
1305
|
m->text = rb_str_dup(text);
|
1324
1306
|
rb_str_freeze(m->text);
|
@@ -1330,10 +1312,13 @@ static VALUE re2_regexp_match(int argc, VALUE *argv, VALUE self) {
|
|
1330
1312
|
|
1331
1313
|
m->number_of_matches = n;
|
1332
1314
|
|
1333
|
-
|
1334
|
-
|
1335
|
-
|
1336
|
-
|
1315
|
+
#ifdef HAVE_ENDPOS_ARGUMENT
|
1316
|
+
bool matched = p->pattern->Match(RSTRING_PTR(m->text), 0,
|
1317
|
+
RSTRING_LEN(m->text), RE2::UNANCHORED, m->matches, n);
|
1318
|
+
#else
|
1319
|
+
bool matched = p->pattern->Match(RSTRING_PTR(m->text), 0,
|
1320
|
+
RE2::UNANCHORED, m->matches, n);
|
1321
|
+
#endif
|
1337
1322
|
if (matched) {
|
1338
1323
|
return matchdata;
|
1339
1324
|
} else {
|
@@ -1348,10 +1333,8 @@ static VALUE re2_regexp_match(int argc, VALUE *argv, VALUE self) {
|
|
1348
1333
|
*
|
1349
1334
|
* @return [Boolean] whether the match was successful
|
1350
1335
|
*/
|
1351
|
-
static VALUE re2_regexp_match_p(VALUE self, VALUE text) {
|
1352
|
-
VALUE argv[2];
|
1353
|
-
argv[0] = text;
|
1354
|
-
argv[1] = INT2FIX(0);
|
1336
|
+
static VALUE re2_regexp_match_p(const VALUE self, VALUE text) {
|
1337
|
+
VALUE argv[2] = { text, INT2FIX(0) };
|
1355
1338
|
|
1356
1339
|
return re2_regexp_match(2, argv, self);
|
1357
1340
|
}
|
@@ -1362,16 +1345,18 @@ static VALUE re2_regexp_match_p(VALUE self, VALUE text) {
|
|
1362
1345
|
* @example
|
1363
1346
|
* c = RE2::Regexp.new('(\w+)').scan("Foo bar baz")
|
1364
1347
|
*/
|
1365
|
-
static VALUE re2_regexp_scan(VALUE self, VALUE text) {
|
1348
|
+
static VALUE re2_regexp_scan(const VALUE self, VALUE text) {
|
1349
|
+
/* Ensure text is a string. */
|
1350
|
+
StringValue(text);
|
1351
|
+
|
1366
1352
|
re2_pattern *p;
|
1367
1353
|
re2_scanner *c;
|
1368
|
-
VALUE scanner;
|
1369
1354
|
|
1370
1355
|
Data_Get_Struct(self, re2_pattern, p);
|
1371
|
-
scanner = rb_class_new_instance(0, 0, re2_cScanner);
|
1356
|
+
VALUE scanner = rb_class_new_instance(0, 0, re2_cScanner);
|
1372
1357
|
Data_Get_Struct(scanner, re2_scanner, c);
|
1373
1358
|
|
1374
|
-
c->input = new(nothrow) re2::StringPiece(
|
1359
|
+
c->input = new(std::nothrow) re2::StringPiece(RSTRING_PTR(text));
|
1375
1360
|
c->regexp = self;
|
1376
1361
|
c->text = text;
|
1377
1362
|
|
@@ -1390,6 +1375,10 @@ static VALUE re2_regexp_scan(VALUE self, VALUE text) {
|
|
1390
1375
|
* Returns a copy of +str+ with the first occurrence +pattern+
|
1391
1376
|
* replaced with +rewrite+.
|
1392
1377
|
*
|
1378
|
+
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
1379
|
+
* returned in UTF-8 by default or ISO-8859-1 if the :utf8 option for the
|
1380
|
+
* RE2::Regexp is set to false (any other encoding's behaviour is undefined).
|
1381
|
+
*
|
1393
1382
|
* @param [String] str the string to modify
|
1394
1383
|
* @param [String, RE2::Regexp] pattern a regexp matching text to be replaced
|
1395
1384
|
* @param [String] rewrite the string to replace with
|
@@ -1399,34 +1388,42 @@ static VALUE re2_regexp_scan(VALUE self, VALUE text) {
|
|
1399
1388
|
* re2 = RE2::Regexp.new("hel+o")
|
1400
1389
|
* RE2.Replace("hello there", re2, "yo") #=> "yo there"
|
1401
1390
|
*/
|
1402
|
-
static VALUE re2_Replace(VALUE
|
1391
|
+
static VALUE re2_Replace(VALUE, VALUE str, VALUE pattern,
|
1403
1392
|
VALUE rewrite) {
|
1404
|
-
|
1393
|
+
/* Ensure rewrite is a string. */
|
1394
|
+
StringValue(rewrite);
|
1395
|
+
|
1405
1396
|
re2_pattern *p;
|
1406
1397
|
|
1407
|
-
/*
|
1408
|
-
|
1398
|
+
/* Take a copy of str so it can be modified in-place by
|
1399
|
+
* RE2::Replace.
|
1400
|
+
*/
|
1401
|
+
std::string str_as_string(StringValuePtr(str));
|
1409
1402
|
|
1410
1403
|
/* Do the replacement. */
|
1411
1404
|
if (rb_obj_is_kind_of(pattern, re2_cRegexp)) {
|
1412
1405
|
Data_Get_Struct(pattern, re2_pattern, p);
|
1413
|
-
RE2::Replace(&str_as_string, *p->pattern,
|
1406
|
+
RE2::Replace(&str_as_string, *p->pattern, RSTRING_PTR(rewrite));
|
1414
1407
|
|
1415
|
-
return
|
1416
|
-
p->pattern->options().encoding()
|
1408
|
+
return encoded_str_new(str_as_string.data(), str_as_string.size(),
|
1409
|
+
p->pattern->options().encoding());
|
1417
1410
|
} else {
|
1418
|
-
|
1419
|
-
|
1411
|
+
/* Ensure pattern is a string. */
|
1412
|
+
StringValue(pattern);
|
1420
1413
|
|
1421
|
-
|
1422
|
-
pattern);
|
1423
|
-
}
|
1414
|
+
RE2::Replace(&str_as_string, RSTRING_PTR(pattern), RSTRING_PTR(rewrite));
|
1424
1415
|
|
1416
|
+
return encoded_str_new(str_as_string.data(), str_as_string.size(), RE2::Options::EncodingUTF8);
|
1417
|
+
}
|
1425
1418
|
}
|
1426
1419
|
|
1427
1420
|
/*
|
1428
1421
|
* Return a copy of +str+ with +pattern+ replaced by +rewrite+.
|
1429
1422
|
*
|
1423
|
+
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
1424
|
+
* returned in UTF-8 by default or ISO-8859-1 if the :utf8 option for the
|
1425
|
+
* RE2::Regexp is set to false (any other encoding's behaviour is undefined).
|
1426
|
+
*
|
1430
1427
|
* @param [String] str the string to modify
|
1431
1428
|
* @param [String, RE2::Regexp] pattern a regexp matching text to be replaced
|
1432
1429
|
* @param [String] rewrite the string to replace with
|
@@ -1436,27 +1433,32 @@ static VALUE re2_Replace(VALUE self, VALUE str, VALUE pattern,
|
|
1436
1433
|
* RE2.GlobalReplace("whoops-doops", re2, "e") #=> "wheps-deps"
|
1437
1434
|
* RE2.GlobalReplace("hello there", "e", "i") #=> "hillo thiri"
|
1438
1435
|
*/
|
1439
|
-
static VALUE re2_GlobalReplace(VALUE
|
1436
|
+
static VALUE re2_GlobalReplace(VALUE, VALUE str, VALUE pattern,
|
1440
1437
|
VALUE rewrite) {
|
1441
|
-
|
1438
|
+
/* Ensure rewrite is a string. */
|
1439
|
+
StringValue(rewrite);
|
1442
1440
|
|
1443
|
-
/*
|
1441
|
+
/* Take a copy of str so it can be modified in-place by
|
1442
|
+
* RE2::GlobalReplace.
|
1443
|
+
*/
|
1444
1444
|
re2_pattern *p;
|
1445
|
-
string str_as_string(StringValuePtr(str));
|
1445
|
+
std::string str_as_string(StringValuePtr(str));
|
1446
1446
|
|
1447
1447
|
/* Do the replacement. */
|
1448
1448
|
if (rb_obj_is_kind_of(pattern, re2_cRegexp)) {
|
1449
1449
|
Data_Get_Struct(pattern, re2_pattern, p);
|
1450
|
-
RE2::GlobalReplace(&str_as_string, *p->pattern,
|
1450
|
+
RE2::GlobalReplace(&str_as_string, *p->pattern, RSTRING_PTR(rewrite));
|
1451
1451
|
|
1452
|
-
return
|
1453
|
-
p->pattern->options().encoding()
|
1452
|
+
return encoded_str_new(str_as_string.data(), str_as_string.size(),
|
1453
|
+
p->pattern->options().encoding());
|
1454
1454
|
} else {
|
1455
|
-
|
1456
|
-
|
1455
|
+
/* Ensure pattern is a string. */
|
1456
|
+
StringValue(pattern);
|
1457
|
+
|
1458
|
+
RE2::GlobalReplace(&str_as_string, RSTRING_PTR(pattern),
|
1459
|
+
RSTRING_PTR(rewrite));
|
1457
1460
|
|
1458
|
-
return
|
1459
|
-
pattern);
|
1461
|
+
return encoded_str_new(str_as_string.data(), str_as_string.size(), RE2::Options::EncodingUTF8);
|
1460
1462
|
}
|
1461
1463
|
}
|
1462
1464
|
|
@@ -1470,13 +1472,15 @@ static VALUE re2_GlobalReplace(VALUE self, VALUE str, VALUE pattern,
|
|
1470
1472
|
* @example
|
1471
1473
|
* RE2::Regexp.escape("1.5-2.0?") #=> "1\.5\-2\.0\?"
|
1472
1474
|
*/
|
1473
|
-
static VALUE re2_QuoteMeta(VALUE
|
1474
|
-
|
1475
|
-
|
1475
|
+
static VALUE re2_QuoteMeta(VALUE, VALUE unquoted) {
|
1476
|
+
StringValue(unquoted);
|
1477
|
+
|
1478
|
+
std::string quoted_string = RE2::QuoteMeta(RSTRING_PTR(unquoted));
|
1479
|
+
|
1476
1480
|
return rb_str_new(quoted_string.data(), quoted_string.size());
|
1477
1481
|
}
|
1478
1482
|
|
1479
|
-
void re2_set_free(re2_set *self) {
|
1483
|
+
static void re2_set_free(re2_set *self) {
|
1480
1484
|
if (self->set) {
|
1481
1485
|
delete self->set;
|
1482
1486
|
}
|
@@ -1486,6 +1490,7 @@ void re2_set_free(re2_set *self) {
|
|
1486
1490
|
static VALUE re2_set_allocate(VALUE klass) {
|
1487
1491
|
re2_set *s;
|
1488
1492
|
VALUE result = Data_Make_Struct(klass, re2_set, 0, re2_set_free, s);
|
1493
|
+
|
1489
1494
|
return result;
|
1490
1495
|
}
|
1491
1496
|
|
@@ -1533,18 +1538,13 @@ static VALUE re2_set_allocate(VALUE klass) {
|
|
1533
1538
|
static VALUE re2_set_initialize(int argc, VALUE *argv, VALUE self) {
|
1534
1539
|
VALUE anchor, options;
|
1535
1540
|
re2_set *s;
|
1536
|
-
RE2::Anchor re2_anchor;
|
1537
|
-
RE2::Options re2_options;
|
1538
1541
|
|
1539
1542
|
rb_scan_args(argc, argv, "02", &anchor, &options);
|
1540
1543
|
Data_Get_Struct(self, re2_set, s);
|
1541
1544
|
|
1542
|
-
|
1543
|
-
|
1544
|
-
|
1545
|
-
if (NIL_P(anchor)) {
|
1546
|
-
re2_anchor = RE2::UNANCHORED;
|
1547
|
-
} else {
|
1545
|
+
RE2::Anchor re2_anchor = RE2::UNANCHORED;
|
1546
|
+
|
1547
|
+
if (!NIL_P(anchor)) {
|
1548
1548
|
Check_Type(anchor, T_SYMBOL);
|
1549
1549
|
ID id_anchor = SYM2ID(anchor);
|
1550
1550
|
if (id_anchor == id_unanchored) {
|
@@ -1558,7 +1558,13 @@ static VALUE re2_set_initialize(int argc, VALUE *argv, VALUE self) {
|
|
1558
1558
|
}
|
1559
1559
|
}
|
1560
1560
|
|
1561
|
-
|
1561
|
+
RE2::Options re2_options;
|
1562
|
+
|
1563
|
+
if (RTEST(options)) {
|
1564
|
+
parse_re2_options(&re2_options, options);
|
1565
|
+
}
|
1566
|
+
|
1567
|
+
s->set = new(std::nothrow) RE2::Set(re2_options, re2_anchor);
|
1562
1568
|
if (s->set == 0) {
|
1563
1569
|
rb_raise(rb_eNoMemError, "not enough memory to allocate RE2::Set object");
|
1564
1570
|
}
|
@@ -1579,14 +1585,25 @@ static VALUE re2_set_initialize(int argc, VALUE *argv, VALUE self) {
|
|
1579
1585
|
* set.add("def") #=> 1
|
1580
1586
|
*/
|
1581
1587
|
static VALUE re2_set_add(VALUE self, VALUE pattern) {
|
1582
|
-
|
1583
|
-
|
1584
|
-
std::string err;
|
1588
|
+
StringValue(pattern);
|
1589
|
+
|
1585
1590
|
re2_set *s;
|
1586
1591
|
Data_Get_Struct(self, re2_set, s);
|
1587
|
-
|
1592
|
+
|
1593
|
+
/* To prevent the memory of the err string leaking when we call rb_raise,
|
1594
|
+
* take a copy of it and let it go out of scope.
|
1595
|
+
*/
|
1596
|
+
char msg[100];
|
1597
|
+
int index;
|
1598
|
+
|
1599
|
+
{
|
1600
|
+
std::string err;
|
1601
|
+
index = s->set->Add(RSTRING_PTR(pattern), &err);
|
1602
|
+
strlcpy(msg, err.c_str(), sizeof(msg));
|
1603
|
+
}
|
1604
|
+
|
1588
1605
|
if (index < 0) {
|
1589
|
-
rb_raise(rb_eArgError, "str rejected by RE2::Set->Add(): %s",
|
1606
|
+
rb_raise(rb_eArgError, "str rejected by RE2::Set->Add(): %s", msg);
|
1590
1607
|
}
|
1591
1608
|
|
1592
1609
|
return INT2FIX(index);
|
@@ -1616,8 +1633,7 @@ static VALUE re2_set_compile(VALUE self) {
|
|
1616
1633
|
*
|
1617
1634
|
* @return [Bool] whether the underlying re2 outputs error information from Set matches
|
1618
1635
|
*/
|
1619
|
-
static VALUE re2_set_match_raises_errors_p(VALUE
|
1620
|
-
UNUSED(self);
|
1636
|
+
static VALUE re2_set_match_raises_errors_p(VALUE) {
|
1621
1637
|
#ifdef HAVE_ERROR_INFO_ARGUMENT
|
1622
1638
|
return Qtrue;
|
1623
1639
|
#else
|
@@ -1665,29 +1681,30 @@ static VALUE re2_set_match_raises_errors_p(VALUE self) {
|
|
1665
1681
|
* set.compile
|
1666
1682
|
* set.match("abcdef", :exception => true) # => [0, 1]
|
1667
1683
|
*/
|
1668
|
-
static VALUE re2_set_match(int argc, VALUE *argv, VALUE self) {
|
1669
|
-
VALUE str, options
|
1684
|
+
static VALUE re2_set_match(int argc, VALUE *argv, const VALUE self) {
|
1685
|
+
VALUE str, options;
|
1670
1686
|
bool raise_exception = true;
|
1671
1687
|
rb_scan_args(argc, argv, "11", &str, &options);
|
1672
|
-
|
1673
|
-
|
1674
|
-
std::vector<int> v;
|
1688
|
+
|
1689
|
+
StringValue(str);
|
1675
1690
|
re2_set *s;
|
1676
1691
|
Data_Get_Struct(self, re2_set, s);
|
1677
1692
|
|
1678
1693
|
if (RTEST(options)) {
|
1679
1694
|
Check_Type(options, T_HASH);
|
1680
1695
|
|
1681
|
-
exception_option = rb_hash_aref(options, ID2SYM(id_exception));
|
1696
|
+
VALUE exception_option = rb_hash_aref(options, ID2SYM(id_exception));
|
1682
1697
|
if (!NIL_P(exception_option)) {
|
1683
1698
|
raise_exception = RTEST(exception_option);
|
1684
1699
|
}
|
1685
1700
|
}
|
1686
1701
|
|
1702
|
+
std::vector<int> v;
|
1703
|
+
|
1687
1704
|
if (raise_exception) {
|
1688
1705
|
#ifdef HAVE_ERROR_INFO_ARGUMENT
|
1689
1706
|
RE2::Set::ErrorInfo e;
|
1690
|
-
bool match_failed = !s->set->Match(
|
1707
|
+
bool match_failed = !s->set->Match(RSTRING_PTR(str), &v, &e);
|
1691
1708
|
VALUE result = rb_ary_new2(v.size());
|
1692
1709
|
|
1693
1710
|
if (match_failed) {
|
@@ -1704,7 +1721,7 @@ static VALUE re2_set_match(int argc, VALUE *argv, VALUE self) {
|
|
1704
1721
|
rb_raise(re2_eSetMatchError, "Unknown RE2::Set::ErrorKind: %d", e.kind);
|
1705
1722
|
}
|
1706
1723
|
} else {
|
1707
|
-
for (
|
1724
|
+
for (std::vector<int>::size_type i = 0; i < v.size(); ++i) {
|
1708
1725
|
rb_ary_push(result, INT2FIX(v[i]));
|
1709
1726
|
}
|
1710
1727
|
}
|
@@ -1714,11 +1731,11 @@ static VALUE re2_set_match(int argc, VALUE *argv, VALUE self) {
|
|
1714
1731
|
rb_raise(re2_eSetUnsupportedError, "current version of RE2::Set::Match() does not output error information, :exception option can only be set to false");
|
1715
1732
|
#endif
|
1716
1733
|
} else {
|
1717
|
-
bool matched = s->set->Match(
|
1734
|
+
bool matched = s->set->Match(RSTRING_PTR(str), &v);
|
1718
1735
|
VALUE result = rb_ary_new2(v.size());
|
1719
1736
|
|
1720
1737
|
if (matched) {
|
1721
|
-
for (
|
1738
|
+
for (std::vector<int>::size_type i = 0; i < v.size(); ++i) {
|
1722
1739
|
rb_ary_push(result, INT2FIX(v[i]));
|
1723
1740
|
}
|
1724
1741
|
}
|
@@ -1727,12 +1744,7 @@ static VALUE re2_set_match(int argc, VALUE *argv, VALUE self) {
|
|
1727
1744
|
}
|
1728
1745
|
}
|
1729
1746
|
|
1730
|
-
|
1731
|
-
* that YARD can parse it.
|
1732
|
-
*/
|
1733
|
-
extern "C" void Init_re2(void);
|
1734
|
-
|
1735
|
-
void Init_re2(void) {
|
1747
|
+
extern "C" void Init_re2(void) {
|
1736
1748
|
re2_mRE2 = rb_define_module("RE2");
|
1737
1749
|
re2_cRegexp = rb_define_class_under(re2_mRE2, "Regexp", rb_cObject);
|
1738
1750
|
re2_cMatchData = rb_define_class_under(re2_mRE2, "MatchData", rb_cObject);
|
@@ -1868,7 +1880,7 @@ void Init_re2(void) {
|
|
1868
1880
|
rb_define_singleton_method(re2_cRegexp, "compile",
|
1869
1881
|
RUBY_METHOD_FUNC(rb_class_new_instance), -1);
|
1870
1882
|
|
1871
|
-
|
1883
|
+
rb_define_module_function(rb_mKernel, "RE2", RUBY_METHOD_FUNC(re2_re2), -1);
|
1872
1884
|
|
1873
1885
|
/* Create the symbols used in options. */
|
1874
1886
|
id_utf8 = rb_intern("utf8");
|