re2 2.15.0.rc1-x86_64-linux-musl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rspec +3 -0
- data/Gemfile +11 -0
- data/LICENSE-DEPENDENCIES.txt +237 -0
- data/LICENSE.txt +28 -0
- data/README.md +396 -0
- data/Rakefile +94 -0
- data/dependencies.yml +7 -0
- data/ext/re2/extconf.rb +332 -0
- data/ext/re2/re2.cc +2254 -0
- data/ext/re2/recipes.rb +54 -0
- data/lib/3.1/re2.so +0 -0
- data/lib/3.2/re2.so +0 -0
- data/lib/3.3/re2.so +0 -0
- data/lib/3.4/re2.so +0 -0
- data/lib/re2/regexp.rb +72 -0
- data/lib/re2/scanner.rb +26 -0
- data/lib/re2/string.rb +38 -0
- data/lib/re2/version.rb +14 -0
- data/lib/re2.rb +20 -0
- data/re2.gemspec +47 -0
- data/spec/kernel_spec.rb +37 -0
- data/spec/re2/match_data_spec.rb +411 -0
- data/spec/re2/regexp_spec.rb +911 -0
- data/spec/re2/scanner_spec.rb +275 -0
- data/spec/re2/set_spec.rb +231 -0
- data/spec/re2/string_spec.rb +62 -0
- data/spec/re2_spec.rb +201 -0
- data/spec/spec_helper.rb +31 -0
- metadata +129 -0
data/ext/re2/re2.cc
ADDED
@@ -0,0 +1,2254 @@
|
|
1
|
+
/*
|
2
|
+
* re2 (https://github.com/mudge/re2)
|
3
|
+
* Ruby bindings to RE2, a "fast, safe, thread-friendly alternative to
|
4
|
+
* backtracking regular expression engines like those used in PCRE, Perl, and
|
5
|
+
* Python".
|
6
|
+
*
|
7
|
+
* Copyright (c) 2010, Paul Mucur (https://mudge.name)
|
8
|
+
* Released under the BSD Licence, please see LICENSE.txt
|
9
|
+
*/
|
10
|
+
|
11
|
+
#include <stdint.h>
|
12
|
+
|
13
|
+
#include <map>
|
14
|
+
#include <sstream>
|
15
|
+
#include <string>
|
16
|
+
#include <vector>
|
17
|
+
|
18
|
+
#include <re2/re2.h>
|
19
|
+
#include <re2/set.h>
|
20
|
+
#include <ruby.h>
|
21
|
+
#include <ruby/encoding.h>
|
22
|
+
|
23
|
+
#define BOOL2RUBY(v) (v ? Qtrue : Qfalse)
|
24
|
+
|
25
|
+
typedef struct {
|
26
|
+
RE2 *pattern;
|
27
|
+
} re2_pattern;
|
28
|
+
|
29
|
+
typedef struct {
|
30
|
+
re2::StringPiece *matches;
|
31
|
+
int number_of_matches;
|
32
|
+
VALUE regexp, text;
|
33
|
+
} re2_matchdata;
|
34
|
+
|
35
|
+
typedef struct {
|
36
|
+
re2::StringPiece *input;
|
37
|
+
int number_of_capturing_groups;
|
38
|
+
bool eof;
|
39
|
+
VALUE regexp, text;
|
40
|
+
} re2_scanner;
|
41
|
+
|
42
|
+
typedef struct {
|
43
|
+
RE2::Set *set;
|
44
|
+
} re2_set;
|
45
|
+
|
46
|
+
VALUE re2_mRE2, re2_cRegexp, re2_cMatchData, re2_cScanner, re2_cSet,
|
47
|
+
re2_eSetMatchError, re2_eSetUnsupportedError, re2_eRegexpUnsupportedError;
|
48
|
+
|
49
|
+
/* Symbols used in RE2 options. */
|
50
|
+
static ID id_utf8, id_posix_syntax, id_longest_match, id_log_errors,
|
51
|
+
id_max_mem, id_literal, id_never_nl, id_case_sensitive,
|
52
|
+
id_perl_classes, id_word_boundary, id_one_line, id_unanchored,
|
53
|
+
id_anchor, id_anchor_start, id_anchor_both, id_exception,
|
54
|
+
id_submatches, id_startpos, id_endpos;
|
55
|
+
|
56
|
+
inline VALUE encoded_str_new(const char *str, long length, RE2::Options::Encoding encoding) {
|
57
|
+
if (encoding == RE2::Options::EncodingUTF8) {
|
58
|
+
return rb_utf8_str_new(str, length);
|
59
|
+
}
|
60
|
+
|
61
|
+
VALUE string = rb_str_new(str, length);
|
62
|
+
rb_enc_associate_index(string, rb_enc_find_index("ISO-8859-1"));
|
63
|
+
|
64
|
+
return string;
|
65
|
+
}
|
66
|
+
|
67
|
+
static void parse_re2_options(RE2::Options* re2_options, const VALUE options) {
|
68
|
+
if (TYPE(options) != T_HASH) {
|
69
|
+
rb_raise(rb_eArgError, "options should be a hash");
|
70
|
+
}
|
71
|
+
|
72
|
+
VALUE utf8 = rb_hash_aref(options, ID2SYM(id_utf8));
|
73
|
+
if (!NIL_P(utf8)) {
|
74
|
+
re2_options->set_encoding(RTEST(utf8) ? RE2::Options::EncodingUTF8 : RE2::Options::EncodingLatin1);
|
75
|
+
}
|
76
|
+
|
77
|
+
VALUE posix_syntax = rb_hash_aref(options, ID2SYM(id_posix_syntax));
|
78
|
+
if (!NIL_P(posix_syntax)) {
|
79
|
+
re2_options->set_posix_syntax(RTEST(posix_syntax));
|
80
|
+
}
|
81
|
+
|
82
|
+
VALUE longest_match = rb_hash_aref(options, ID2SYM(id_longest_match));
|
83
|
+
if (!NIL_P(longest_match)) {
|
84
|
+
re2_options->set_longest_match(RTEST(longest_match));
|
85
|
+
}
|
86
|
+
|
87
|
+
VALUE log_errors = rb_hash_aref(options, ID2SYM(id_log_errors));
|
88
|
+
if (!NIL_P(log_errors)) {
|
89
|
+
re2_options->set_log_errors(RTEST(log_errors));
|
90
|
+
}
|
91
|
+
|
92
|
+
VALUE max_mem = rb_hash_aref(options, ID2SYM(id_max_mem));
|
93
|
+
if (!NIL_P(max_mem)) {
|
94
|
+
re2_options->set_max_mem(NUM2INT(max_mem));
|
95
|
+
}
|
96
|
+
|
97
|
+
VALUE literal = rb_hash_aref(options, ID2SYM(id_literal));
|
98
|
+
if (!NIL_P(literal)) {
|
99
|
+
re2_options->set_literal(RTEST(literal));
|
100
|
+
}
|
101
|
+
|
102
|
+
VALUE never_nl = rb_hash_aref(options, ID2SYM(id_never_nl));
|
103
|
+
if (!NIL_P(never_nl)) {
|
104
|
+
re2_options->set_never_nl(RTEST(never_nl));
|
105
|
+
}
|
106
|
+
|
107
|
+
VALUE case_sensitive = rb_hash_aref(options, ID2SYM(id_case_sensitive));
|
108
|
+
if (!NIL_P(case_sensitive)) {
|
109
|
+
re2_options->set_case_sensitive(RTEST(case_sensitive));
|
110
|
+
}
|
111
|
+
|
112
|
+
VALUE perl_classes = rb_hash_aref(options, ID2SYM(id_perl_classes));
|
113
|
+
if (!NIL_P(perl_classes)) {
|
114
|
+
re2_options->set_perl_classes(RTEST(perl_classes));
|
115
|
+
}
|
116
|
+
|
117
|
+
VALUE word_boundary = rb_hash_aref(options, ID2SYM(id_word_boundary));
|
118
|
+
if (!NIL_P(word_boundary)) {
|
119
|
+
re2_options->set_word_boundary(RTEST(word_boundary));
|
120
|
+
}
|
121
|
+
|
122
|
+
VALUE one_line = rb_hash_aref(options, ID2SYM(id_one_line));
|
123
|
+
if (!NIL_P(one_line)) {
|
124
|
+
re2_options->set_one_line(RTEST(one_line));
|
125
|
+
}
|
126
|
+
}
|
127
|
+
|
128
|
+
/* For compatibility with Ruby < 2.7 */
|
129
|
+
#ifdef HAVE_RB_GC_MARK_MOVABLE
|
130
|
+
#define re2_compact_callback(x) (x),
|
131
|
+
#else
|
132
|
+
#define rb_gc_mark_movable(x) rb_gc_mark(x)
|
133
|
+
#define re2_compact_callback(x)
|
134
|
+
#endif
|
135
|
+
|
136
|
+
static void re2_matchdata_mark(void *ptr) {
|
137
|
+
re2_matchdata *m = reinterpret_cast<re2_matchdata *>(ptr);
|
138
|
+
rb_gc_mark_movable(m->regexp);
|
139
|
+
rb_gc_mark_movable(m->text);
|
140
|
+
}
|
141
|
+
|
142
|
+
#ifdef HAVE_RB_GC_MARK_MOVABLE
|
143
|
+
static void re2_matchdata_compact(void *ptr) {
|
144
|
+
re2_matchdata *m = reinterpret_cast<re2_matchdata *>(ptr);
|
145
|
+
m->regexp = rb_gc_location(m->regexp);
|
146
|
+
m->text = rb_gc_location(m->text);
|
147
|
+
}
|
148
|
+
#endif
|
149
|
+
|
150
|
+
static void re2_matchdata_free(void *ptr) {
|
151
|
+
re2_matchdata *m = reinterpret_cast<re2_matchdata *>(ptr);
|
152
|
+
if (m->matches) {
|
153
|
+
delete[] m->matches;
|
154
|
+
}
|
155
|
+
xfree(m);
|
156
|
+
}
|
157
|
+
|
158
|
+
static size_t re2_matchdata_memsize(const void *ptr) {
|
159
|
+
const re2_matchdata *m = reinterpret_cast<const re2_matchdata *>(ptr);
|
160
|
+
size_t size = sizeof(*m);
|
161
|
+
if (m->matches) {
|
162
|
+
size += sizeof(*m->matches) * m->number_of_matches;
|
163
|
+
}
|
164
|
+
|
165
|
+
return size;
|
166
|
+
}
|
167
|
+
|
168
|
+
static const rb_data_type_t re2_matchdata_data_type = {
|
169
|
+
"RE2::MatchData",
|
170
|
+
{
|
171
|
+
re2_matchdata_mark,
|
172
|
+
re2_matchdata_free,
|
173
|
+
re2_matchdata_memsize,
|
174
|
+
re2_compact_callback(re2_matchdata_compact)
|
175
|
+
},
|
176
|
+
0,
|
177
|
+
0,
|
178
|
+
// IMPORTANT: WB_PROTECTED objects must only use the RB_OBJ_WRITE()
|
179
|
+
// macro to update VALUE references, as to trigger write barriers.
|
180
|
+
RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED
|
181
|
+
};
|
182
|
+
|
183
|
+
static void re2_scanner_mark(void *ptr) {
|
184
|
+
re2_scanner *s = reinterpret_cast<re2_scanner *>(ptr);
|
185
|
+
rb_gc_mark_movable(s->regexp);
|
186
|
+
rb_gc_mark_movable(s->text);
|
187
|
+
}
|
188
|
+
|
189
|
+
#ifdef HAVE_RB_GC_MARK_MOVABLE
|
190
|
+
static void re2_scanner_compact(void *ptr) {
|
191
|
+
re2_scanner *s = reinterpret_cast<re2_scanner *>(ptr);
|
192
|
+
s->regexp = rb_gc_location(s->regexp);
|
193
|
+
s->text = rb_gc_location(s->text);
|
194
|
+
}
|
195
|
+
#endif
|
196
|
+
|
197
|
+
static void re2_scanner_free(void *ptr) {
|
198
|
+
re2_scanner *s = reinterpret_cast<re2_scanner *>(ptr);
|
199
|
+
if (s->input) {
|
200
|
+
delete s->input;
|
201
|
+
}
|
202
|
+
xfree(s);
|
203
|
+
}
|
204
|
+
|
205
|
+
static size_t re2_scanner_memsize(const void *ptr) {
|
206
|
+
const re2_scanner *s = reinterpret_cast<const re2_scanner *>(ptr);
|
207
|
+
size_t size = sizeof(*s);
|
208
|
+
if (s->input) {
|
209
|
+
size += sizeof(*s->input);
|
210
|
+
}
|
211
|
+
|
212
|
+
return size;
|
213
|
+
}
|
214
|
+
|
215
|
+
static const rb_data_type_t re2_scanner_data_type = {
|
216
|
+
"RE2::Scanner",
|
217
|
+
{
|
218
|
+
re2_scanner_mark,
|
219
|
+
re2_scanner_free,
|
220
|
+
re2_scanner_memsize,
|
221
|
+
re2_compact_callback(re2_scanner_compact)
|
222
|
+
},
|
223
|
+
0,
|
224
|
+
0,
|
225
|
+
// IMPORTANT: WB_PROTECTED objects must only use the RB_OBJ_WRITE()
|
226
|
+
// macro to update VALUE references, as to trigger write barriers.
|
227
|
+
RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED
|
228
|
+
};
|
229
|
+
|
230
|
+
static void re2_regexp_free(void *ptr) {
|
231
|
+
re2_pattern *p = reinterpret_cast<re2_pattern *>(ptr);
|
232
|
+
if (p->pattern) {
|
233
|
+
delete p->pattern;
|
234
|
+
}
|
235
|
+
xfree(p);
|
236
|
+
}
|
237
|
+
|
238
|
+
static size_t re2_regexp_memsize(const void *ptr) {
|
239
|
+
const re2_pattern *p = reinterpret_cast<const re2_pattern *>(ptr);
|
240
|
+
size_t size = sizeof(*p);
|
241
|
+
if (p->pattern) {
|
242
|
+
size += sizeof(*p->pattern);
|
243
|
+
}
|
244
|
+
|
245
|
+
return size;
|
246
|
+
}
|
247
|
+
|
248
|
+
static const rb_data_type_t re2_regexp_data_type = {
|
249
|
+
"RE2::Regexp",
|
250
|
+
{
|
251
|
+
0,
|
252
|
+
re2_regexp_free,
|
253
|
+
re2_regexp_memsize,
|
254
|
+
},
|
255
|
+
0,
|
256
|
+
0,
|
257
|
+
// IMPORTANT: WB_PROTECTED objects must only use the RB_OBJ_WRITE()
|
258
|
+
// macro to update VALUE references, as to trigger write barriers.
|
259
|
+
RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED
|
260
|
+
};
|
261
|
+
|
262
|
+
static VALUE re2_matchdata_allocate(VALUE klass) {
|
263
|
+
re2_matchdata *m;
|
264
|
+
|
265
|
+
return TypedData_Make_Struct(klass, re2_matchdata, &re2_matchdata_data_type,
|
266
|
+
m);
|
267
|
+
}
|
268
|
+
|
269
|
+
static VALUE re2_scanner_allocate(VALUE klass) {
|
270
|
+
re2_scanner *c;
|
271
|
+
|
272
|
+
return TypedData_Make_Struct(klass, re2_scanner, &re2_scanner_data_type, c);
|
273
|
+
}
|
274
|
+
|
275
|
+
/*
|
276
|
+
* Returns a frozen copy of the text supplied when matching.
|
277
|
+
*
|
278
|
+
* If the text was already a frozen string, returns the original.
|
279
|
+
*
|
280
|
+
* @return [String] a frozen string with the text supplied when matching
|
281
|
+
* @example
|
282
|
+
* m = RE2::Regexp.new('(\d+)').partial_match("bob 123")
|
283
|
+
* m.string #=> "bob 123"
|
284
|
+
*/
|
285
|
+
static VALUE re2_matchdata_string(const VALUE self) {
|
286
|
+
re2_matchdata *m;
|
287
|
+
TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
|
288
|
+
|
289
|
+
return m->text;
|
290
|
+
}
|
291
|
+
|
292
|
+
/*
|
293
|
+
* Returns the text supplied when incrementally matching with
|
294
|
+
* {RE2::Regexp#scan}.
|
295
|
+
*
|
296
|
+
* @return [String] the original string passed to {RE2::Regexp#scan}
|
297
|
+
* @example
|
298
|
+
* c = RE2::Regexp.new('(\d+)').scan("foo")
|
299
|
+
* c.string #=> "foo"
|
300
|
+
*/
|
301
|
+
static VALUE re2_scanner_string(const VALUE self) {
|
302
|
+
re2_scanner *c;
|
303
|
+
TypedData_Get_Struct(self, re2_scanner, &re2_scanner_data_type, c);
|
304
|
+
|
305
|
+
return c->text;
|
306
|
+
}
|
307
|
+
|
308
|
+
/*
|
309
|
+
* Returns whether the {RE2::Scanner} has consumed all input or not.
|
310
|
+
*
|
311
|
+
* @return [Boolean] whether the {RE2::Scanner} has consumed all input or not
|
312
|
+
* @example
|
313
|
+
* c = RE2::Regexp.new('(\d+)').scan("foo")
|
314
|
+
* c.eof? #=> true
|
315
|
+
*/
|
316
|
+
static VALUE re2_scanner_eof(const VALUE self) {
|
317
|
+
re2_scanner *c;
|
318
|
+
TypedData_Get_Struct(self, re2_scanner, &re2_scanner_data_type, c);
|
319
|
+
|
320
|
+
return BOOL2RUBY(c->eof);
|
321
|
+
}
|
322
|
+
|
323
|
+
/*
|
324
|
+
* Rewind the {RE2::Scanner} to the start of the string.
|
325
|
+
*
|
326
|
+
* @example
|
327
|
+
* s = RE2::Regexp.new('(\d+)').scan("1 2 3")
|
328
|
+
* e = s.to_enum
|
329
|
+
* e.scan #=> ["1"]
|
330
|
+
* e.scan #=> ["2"]
|
331
|
+
* s.rewind
|
332
|
+
* e.scan #=> ["1"]
|
333
|
+
*/
|
334
|
+
static VALUE re2_scanner_rewind(VALUE self) {
|
335
|
+
re2_scanner *c;
|
336
|
+
TypedData_Get_Struct(self, re2_scanner, &re2_scanner_data_type, c);
|
337
|
+
|
338
|
+
delete c->input;
|
339
|
+
c->input = new(std::nothrow) re2::StringPiece(
|
340
|
+
RSTRING_PTR(c->text), RSTRING_LEN(c->text));
|
341
|
+
c->eof = false;
|
342
|
+
|
343
|
+
return self;
|
344
|
+
}
|
345
|
+
|
346
|
+
/*
|
347
|
+
* Scan the given text incrementally for matches using
|
348
|
+
* {https://github.com/google/re2/blob/bc0faab533e2b27b85b8ad312abf061e33ed6b5d/re2/re2.h#L447-L463
|
349
|
+
* `FindAndConsume`}, returning an array of submatches on each subsequent
|
350
|
+
* call. Returns `nil` if no matches are found or an empty array for every
|
351
|
+
* match if the pattern has no capturing groups.
|
352
|
+
*
|
353
|
+
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
354
|
+
* returned in UTF-8 by default or ISO-8859-1 if the `:utf8` option for the
|
355
|
+
* {RE2::Regexp} is set to `false` (any other encoding's behaviour is undefined).
|
356
|
+
*
|
357
|
+
* @return [Array<String>] if the pattern has capturing groups
|
358
|
+
* @return [[]] if the pattern does not have capturing groups
|
359
|
+
* @return [nil] if no matches are found
|
360
|
+
* @example
|
361
|
+
* s = RE2::Regexp.new('(\w+)').scan("Foo bar baz")
|
362
|
+
* s.scan #=> ["Foo"]
|
363
|
+
* s.scan #=> ["bar"]
|
364
|
+
*/
|
365
|
+
static VALUE re2_scanner_scan(VALUE self) {
|
366
|
+
re2_pattern *p;
|
367
|
+
re2_scanner *c;
|
368
|
+
|
369
|
+
TypedData_Get_Struct(self, re2_scanner, &re2_scanner_data_type, c);
|
370
|
+
TypedData_Get_Struct(c->regexp, re2_pattern, &re2_regexp_data_type, p);
|
371
|
+
|
372
|
+
std::vector<RE2::Arg> argv(c->number_of_capturing_groups);
|
373
|
+
std::vector<RE2::Arg*> args(c->number_of_capturing_groups);
|
374
|
+
std::vector<re2::StringPiece> matches(c->number_of_capturing_groups);
|
375
|
+
|
376
|
+
if (c->eof) {
|
377
|
+
return Qnil;
|
378
|
+
}
|
379
|
+
|
380
|
+
re2::StringPiece::size_type original_input_size = c->input->size();
|
381
|
+
|
382
|
+
for (int i = 0; i < c->number_of_capturing_groups; ++i) {
|
383
|
+
argv[i] = &matches[i];
|
384
|
+
args[i] = &argv[i];
|
385
|
+
}
|
386
|
+
|
387
|
+
if (RE2::FindAndConsumeN(c->input, *p->pattern, args.data(),
|
388
|
+
c->number_of_capturing_groups)) {
|
389
|
+
re2::StringPiece::size_type new_input_size = c->input->size();
|
390
|
+
bool input_advanced = new_input_size < original_input_size;
|
391
|
+
|
392
|
+
VALUE result = rb_ary_new2(c->number_of_capturing_groups);
|
393
|
+
|
394
|
+
for (int i = 0; i < c->number_of_capturing_groups; ++i) {
|
395
|
+
if (matches[i].empty()) {
|
396
|
+
rb_ary_push(result, Qnil);
|
397
|
+
} else {
|
398
|
+
rb_ary_push(result, encoded_str_new(matches[i].data(),
|
399
|
+
matches[i].size(),
|
400
|
+
p->pattern->options().encoding()));
|
401
|
+
}
|
402
|
+
}
|
403
|
+
|
404
|
+
/* Check whether we've exhausted the input yet. */
|
405
|
+
c->eof = new_input_size == 0;
|
406
|
+
|
407
|
+
/* If the match didn't advance the input, we need to do this ourselves. */
|
408
|
+
if (!input_advanced && new_input_size > 0) {
|
409
|
+
c->input->remove_prefix(1);
|
410
|
+
}
|
411
|
+
|
412
|
+
return result;
|
413
|
+
} else {
|
414
|
+
return Qnil;
|
415
|
+
}
|
416
|
+
}
|
417
|
+
|
418
|
+
static re2::StringPiece *re2_matchdata_find_match(VALUE idx, const VALUE self) {
|
419
|
+
re2_matchdata *m;
|
420
|
+
re2_pattern *p;
|
421
|
+
|
422
|
+
TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
|
423
|
+
TypedData_Get_Struct(m->regexp, re2_pattern, &re2_regexp_data_type, p);
|
424
|
+
|
425
|
+
int id;
|
426
|
+
|
427
|
+
if (FIXNUM_P(idx)) {
|
428
|
+
id = FIX2INT(idx);
|
429
|
+
} else if (SYMBOL_P(idx)) {
|
430
|
+
const std::map<std::string, int>& groups = p->pattern->NamedCapturingGroups();
|
431
|
+
std::map<std::string, int>::const_iterator search = groups.find(rb_id2name(SYM2ID(idx)));
|
432
|
+
|
433
|
+
if (search != groups.end()) {
|
434
|
+
id = search->second;
|
435
|
+
} else {
|
436
|
+
return NULL;
|
437
|
+
}
|
438
|
+
} else {
|
439
|
+
StringValue(idx);
|
440
|
+
|
441
|
+
const std::map<std::string, int>& groups = p->pattern->NamedCapturingGroups();
|
442
|
+
std::map<std::string, int>::const_iterator search = groups.find(std::string(RSTRING_PTR(idx), RSTRING_LEN(idx)));
|
443
|
+
|
444
|
+
if (search != groups.end()) {
|
445
|
+
id = search->second;
|
446
|
+
} else {
|
447
|
+
return NULL;
|
448
|
+
}
|
449
|
+
}
|
450
|
+
|
451
|
+
if (id >= 0 && id < m->number_of_matches) {
|
452
|
+
re2::StringPiece *match = &m->matches[id];
|
453
|
+
|
454
|
+
if (!match->empty()) {
|
455
|
+
return match;
|
456
|
+
}
|
457
|
+
}
|
458
|
+
|
459
|
+
return NULL;
|
460
|
+
}
|
461
|
+
|
462
|
+
/*
|
463
|
+
* Returns the number of elements in the {RE2::MatchData} (including the
|
464
|
+
* overall match, submatches and any `nils`).
|
465
|
+
*
|
466
|
+
* @return [Integer] the number of elements
|
467
|
+
* @example
|
468
|
+
* m = RE2::Regexp.new('(\d+)').match("bob 123")
|
469
|
+
* m.size #=> 2
|
470
|
+
* m.length #=> 2
|
471
|
+
*/
|
472
|
+
static VALUE re2_matchdata_size(const VALUE self) {
|
473
|
+
re2_matchdata *m;
|
474
|
+
|
475
|
+
TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
|
476
|
+
|
477
|
+
return INT2FIX(m->number_of_matches);
|
478
|
+
}
|
479
|
+
|
480
|
+
/*
|
481
|
+
* Returns the offset of the start of the nth element of the {RE2::MatchData}.
|
482
|
+
*
|
483
|
+
* @param [Integer, String, Symbol] n the name or number of the submatch
|
484
|
+
* @return [Integer, nil] the offset of the start of the match or `nil` if
|
485
|
+
* there is no such submatch
|
486
|
+
* @example
|
487
|
+
* m = RE2::Regexp.new('ob (\d+)').match("bob 123")
|
488
|
+
* m.begin(0) #=> 1
|
489
|
+
* m.begin(1) #=> 4
|
490
|
+
*/
|
491
|
+
static VALUE re2_matchdata_begin(const VALUE self, VALUE n) {
|
492
|
+
re2_matchdata *m;
|
493
|
+
|
494
|
+
TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
|
495
|
+
|
496
|
+
re2::StringPiece *match = re2_matchdata_find_match(n, self);
|
497
|
+
if (match == NULL) {
|
498
|
+
return Qnil;
|
499
|
+
} else {
|
500
|
+
long offset = match->data() - RSTRING_PTR(m->text);
|
501
|
+
|
502
|
+
return LONG2NUM(rb_str_sublen(m->text, offset));
|
503
|
+
}
|
504
|
+
}
|
505
|
+
|
506
|
+
/*
|
507
|
+
* Returns the offset of the character following the end of the nth element of
|
508
|
+
* the {RE2::MatchData}.
|
509
|
+
*
|
510
|
+
* @param [Integer, String, Symbol] n the name or number of the match
|
511
|
+
* @return [Integer, nil] the offset of the character following the end of the
|
512
|
+
* match or `nil` if there is no such match
|
513
|
+
* @example
|
514
|
+
* m = RE2::Regexp.new('ob (\d+) b').match("bob 123 bob")
|
515
|
+
* m.end(0) #=> 9
|
516
|
+
* m.end(1) #=> 7
|
517
|
+
*/
|
518
|
+
static VALUE re2_matchdata_end(const VALUE self, VALUE n) {
|
519
|
+
re2_matchdata *m;
|
520
|
+
|
521
|
+
TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
|
522
|
+
|
523
|
+
re2::StringPiece *match = re2_matchdata_find_match(n, self);
|
524
|
+
if (match == NULL) {
|
525
|
+
return Qnil;
|
526
|
+
} else {
|
527
|
+
long offset = (match->data() - RSTRING_PTR(m->text)) + match->size();
|
528
|
+
|
529
|
+
return LONG2NUM(rb_str_sublen(m->text, offset));
|
530
|
+
}
|
531
|
+
}
|
532
|
+
|
533
|
+
/*
|
534
|
+
* Returns the {RE2::Regexp} used in the match.
|
535
|
+
*
|
536
|
+
* @return [RE2::Regexp] the regular expression used in the match
|
537
|
+
* @example
|
538
|
+
* m = RE2::Regexp.new('(\d+)').match("bob 123")
|
539
|
+
* m.regexp #=> #<RE2::Regexp /(\d+)/>
|
540
|
+
*/
|
541
|
+
static VALUE re2_matchdata_regexp(const VALUE self) {
|
542
|
+
re2_matchdata *m;
|
543
|
+
TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
|
544
|
+
|
545
|
+
return m->regexp;
|
546
|
+
}
|
547
|
+
|
548
|
+
/*
|
549
|
+
* Returns the {RE2::Regexp} used in the {RE2::Scanner}.
|
550
|
+
*
|
551
|
+
* @return [RE2::Regexp] the regular expression used in the {RE2::Scanner}
|
552
|
+
* @example
|
553
|
+
* c = RE2::Regexp.new('(\d+)').scan("bob 123")
|
554
|
+
* c.regexp #=> #<RE2::Regexp /(\d+)/>
|
555
|
+
*/
|
556
|
+
static VALUE re2_scanner_regexp(const VALUE self) {
|
557
|
+
re2_scanner *c;
|
558
|
+
TypedData_Get_Struct(self, re2_scanner, &re2_scanner_data_type, c);
|
559
|
+
|
560
|
+
return c->regexp;
|
561
|
+
}
|
562
|
+
|
563
|
+
static VALUE re2_regexp_allocate(VALUE klass) {
|
564
|
+
re2_pattern *p;
|
565
|
+
|
566
|
+
return TypedData_Make_Struct(klass, re2_pattern, &re2_regexp_data_type, p);
|
567
|
+
}
|
568
|
+
|
569
|
+
/*
|
570
|
+
* Returns the array of matches including the overall match, submatches and any
|
571
|
+
* `nil`s.
|
572
|
+
*
|
573
|
+
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
574
|
+
* returned in UTF-8 by default or ISO-8859-1 if the `:utf8` option for the
|
575
|
+
* {RE2::Regexp} is set to `false` (any other encoding's behaviour is undefined).
|
576
|
+
*
|
577
|
+
* @return [Array<String, nil>] the array of matches
|
578
|
+
* @example
|
579
|
+
* m = RE2::Regexp.new('(\d+)').match("bob 123")
|
580
|
+
* m.to_a #=> ["123", "123"]
|
581
|
+
*/
|
582
|
+
static VALUE re2_matchdata_to_a(const VALUE self) {
|
583
|
+
re2_matchdata *m;
|
584
|
+
re2_pattern *p;
|
585
|
+
|
586
|
+
TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
|
587
|
+
TypedData_Get_Struct(m->regexp, re2_pattern, &re2_regexp_data_type, p);
|
588
|
+
|
589
|
+
VALUE array = rb_ary_new2(m->number_of_matches);
|
590
|
+
for (int i = 0; i < m->number_of_matches; ++i) {
|
591
|
+
re2::StringPiece *match = &m->matches[i];
|
592
|
+
|
593
|
+
if (match->empty()) {
|
594
|
+
rb_ary_push(array, Qnil);
|
595
|
+
} else {
|
596
|
+
rb_ary_push(array, encoded_str_new(match->data(), match->size(),
|
597
|
+
p->pattern->options().encoding()));
|
598
|
+
}
|
599
|
+
}
|
600
|
+
|
601
|
+
return array;
|
602
|
+
}
|
603
|
+
|
604
|
+
static VALUE re2_matchdata_nth_match(int nth, const VALUE self) {
|
605
|
+
re2_matchdata *m;
|
606
|
+
re2_pattern *p;
|
607
|
+
|
608
|
+
TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
|
609
|
+
TypedData_Get_Struct(m->regexp, re2_pattern, &re2_regexp_data_type, p);
|
610
|
+
|
611
|
+
if (nth < 0 || nth >= m->number_of_matches) {
|
612
|
+
return Qnil;
|
613
|
+
} else {
|
614
|
+
re2::StringPiece *match = &m->matches[nth];
|
615
|
+
|
616
|
+
if (match->empty()) {
|
617
|
+
return Qnil;
|
618
|
+
} else {
|
619
|
+
return encoded_str_new(match->data(), match->size(),
|
620
|
+
p->pattern->options().encoding());
|
621
|
+
}
|
622
|
+
}
|
623
|
+
}
|
624
|
+
|
625
|
+
static VALUE re2_matchdata_named_match(const std::string &name, const VALUE self) {
|
626
|
+
re2_matchdata *m;
|
627
|
+
re2_pattern *p;
|
628
|
+
|
629
|
+
TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
|
630
|
+
TypedData_Get_Struct(m->regexp, re2_pattern, &re2_regexp_data_type, p);
|
631
|
+
|
632
|
+
const std::map<std::string, int>& groups = p->pattern->NamedCapturingGroups();
|
633
|
+
std::map<std::string, int>::const_iterator search = groups.find(name);
|
634
|
+
|
635
|
+
if (search != groups.end()) {
|
636
|
+
return re2_matchdata_nth_match(search->second, self);
|
637
|
+
} else {
|
638
|
+
return Qnil;
|
639
|
+
}
|
640
|
+
}
|
641
|
+
|
642
|
+
/*
|
643
|
+
* Retrieve zero, one or more matches by index or name.
|
644
|
+
*
|
645
|
+
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
646
|
+
* returned in UTF-8 by default or ISO-8859-1 if the `:utf8` option for the
|
647
|
+
* {RE2::Regexp} is set to `false` (any other encoding's behaviour is undefined).
|
648
|
+
*
|
649
|
+
* @overload [](index)
|
650
|
+
* Access a particular match by index.
|
651
|
+
*
|
652
|
+
* @param [Integer] index the index of the match to fetch
|
653
|
+
* @return [String, nil] the specified match or `nil` if it isn't present
|
654
|
+
* @example
|
655
|
+
* m = RE2::Regexp.new('(\d+)').match("bob 123")
|
656
|
+
* m[0] #=> "123"
|
657
|
+
*
|
658
|
+
* @overload [](start, length)
|
659
|
+
* Access a range of matches by starting index and length.
|
660
|
+
*
|
661
|
+
* @param [Integer] start the index from which to start
|
662
|
+
* @param [Integer] length the number of elements to fetch
|
663
|
+
* @return [Array<String, nil>] the specified matches
|
664
|
+
* @example
|
665
|
+
* m = RE2::Regexp.new('(\d+)').match("bob 123")
|
666
|
+
* m[0, 1] #=> ["123"]
|
667
|
+
*
|
668
|
+
* @overload [](range)
|
669
|
+
* Access a range of matches by index.
|
670
|
+
*
|
671
|
+
* @param [Range] range the range of match indexes to fetch
|
672
|
+
* @return [Array<String, nil>] the specified matches
|
673
|
+
* @example
|
674
|
+
* m = RE2::Regexp.new('(\d+)').match("bob 123")
|
675
|
+
* m[0..1] #=> "[123", "123"]
|
676
|
+
*
|
677
|
+
* @overload [](name)
|
678
|
+
* Access a particular match by name.
|
679
|
+
*
|
680
|
+
* @param [String, Symbol] name the name of the match to fetch
|
681
|
+
* @return [String, nil] the specific match or `nil` if it isn't present
|
682
|
+
* @example
|
683
|
+
* m = RE2::Regexp.new('(?P<number>\d+)').match("bob 123")
|
684
|
+
* m["number"] #=> "123"
|
685
|
+
* m[:number] #=> "123"
|
686
|
+
*/
|
687
|
+
static VALUE re2_matchdata_aref(int argc, VALUE *argv, const VALUE self) {
|
688
|
+
VALUE idx, rest;
|
689
|
+
rb_scan_args(argc, argv, "11", &idx, &rest);
|
690
|
+
|
691
|
+
if (TYPE(idx) == T_STRING) {
|
692
|
+
return re2_matchdata_named_match(
|
693
|
+
std::string(RSTRING_PTR(idx), RSTRING_LEN(idx)), self);
|
694
|
+
} else if (SYMBOL_P(idx)) {
|
695
|
+
return re2_matchdata_named_match(rb_id2name(SYM2ID(idx)), self);
|
696
|
+
} else if (!NIL_P(rest) || !FIXNUM_P(idx) || FIX2INT(idx) < 0) {
|
697
|
+
return rb_ary_aref(argc, argv, re2_matchdata_to_a(self));
|
698
|
+
} else {
|
699
|
+
return re2_matchdata_nth_match(FIX2INT(idx), self);
|
700
|
+
}
|
701
|
+
}
|
702
|
+
|
703
|
+
/*
|
704
|
+
* Returns the entire matched string.
|
705
|
+
*
|
706
|
+
* @return [String] the entire matched string
|
707
|
+
*/
|
708
|
+
static VALUE re2_matchdata_to_s(const VALUE self) {
|
709
|
+
return re2_matchdata_nth_match(0, self);
|
710
|
+
}
|
711
|
+
|
712
|
+
/*
|
713
|
+
* Returns a printable version of the match.
|
714
|
+
*
|
715
|
+
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
716
|
+
* returned in UTF-8 by default or ISO-8859-1 if the `:utf8` option for the
|
717
|
+
* {RE2::Regexp} is set to `false` (any other encoding's behaviour is undefined).
|
718
|
+
*
|
719
|
+
* @return [String] a printable version of the match
|
720
|
+
* @example
|
721
|
+
* m = RE2::Regexp.new('(\d+)').match("bob 123")
|
722
|
+
* m.inspect #=> "#<RE2::MatchData \"123\" 1:\"123\">"
|
723
|
+
*/
|
724
|
+
static VALUE re2_matchdata_inspect(const VALUE self) {
|
725
|
+
re2_matchdata *m;
|
726
|
+
re2_pattern *p;
|
727
|
+
|
728
|
+
TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
|
729
|
+
TypedData_Get_Struct(m->regexp, re2_pattern, &re2_regexp_data_type, p);
|
730
|
+
|
731
|
+
std::ostringstream output;
|
732
|
+
output << "#<RE2::MatchData";
|
733
|
+
|
734
|
+
for (int i = 0; i < m->number_of_matches; ++i) {
|
735
|
+
output << " ";
|
736
|
+
|
737
|
+
if (i > 0) {
|
738
|
+
output << i << ":";
|
739
|
+
}
|
740
|
+
|
741
|
+
VALUE match = re2_matchdata_nth_match(i, self);
|
742
|
+
|
743
|
+
if (match == Qnil) {
|
744
|
+
output << "nil";
|
745
|
+
} else {
|
746
|
+
output << "\"";
|
747
|
+
output.write(RSTRING_PTR(match), RSTRING_LEN(match));
|
748
|
+
output << "\"";
|
749
|
+
}
|
750
|
+
}
|
751
|
+
|
752
|
+
output << ">";
|
753
|
+
|
754
|
+
return encoded_str_new(output.str().data(), output.str().length(),
|
755
|
+
p->pattern->options().encoding());
|
756
|
+
}
|
757
|
+
|
758
|
+
/*
|
759
|
+
* Returns the array of submatches for pattern matching.
|
760
|
+
*
|
761
|
+
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
762
|
+
* returned in UTF-8 by default or ISO-8859-1 if the `:utf8` option for the
|
763
|
+
* {RE2::Regexp} is set to `false` (any other encoding's behaviour is
|
764
|
+
* undefined).
|
765
|
+
*
|
766
|
+
* @return [Array<String, nil>] the array of submatches
|
767
|
+
* @example
|
768
|
+
* m = RE2::Regexp.new('(\d+)').match("bob 123")
|
769
|
+
* m.deconstruct #=> ["123"]
|
770
|
+
*
|
771
|
+
* @example pattern matching
|
772
|
+
* case RE2::Regexp.new('(\d+) (\d+)').match("bob 123 456")
|
773
|
+
* in x, y
|
774
|
+
* puts "Matched #{x} #{y}"
|
775
|
+
* else
|
776
|
+
* puts "Unrecognised match"
|
777
|
+
* end
|
778
|
+
*/
|
779
|
+
static VALUE re2_matchdata_deconstruct(const VALUE self) {
|
780
|
+
re2_matchdata *m;
|
781
|
+
re2_pattern *p;
|
782
|
+
|
783
|
+
TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
|
784
|
+
TypedData_Get_Struct(m->regexp, re2_pattern, &re2_regexp_data_type, p);
|
785
|
+
|
786
|
+
VALUE array = rb_ary_new2(m->number_of_matches - 1);
|
787
|
+
for (int i = 1; i < m->number_of_matches; ++i) {
|
788
|
+
re2::StringPiece *match = &m->matches[i];
|
789
|
+
|
790
|
+
if (match->empty()) {
|
791
|
+
rb_ary_push(array, Qnil);
|
792
|
+
} else {
|
793
|
+
rb_ary_push(array, encoded_str_new(match->data(), match->size(),
|
794
|
+
p->pattern->options().encoding()));
|
795
|
+
}
|
796
|
+
}
|
797
|
+
|
798
|
+
return array;
|
799
|
+
}
|
800
|
+
|
801
|
+
/*
|
802
|
+
* Returns a hash of capturing group names to submatches for pattern matching.
|
803
|
+
*
|
804
|
+
* As this is used by Ruby's pattern matching, it will return an empty hash if given
|
805
|
+
* more keys than there are capturing groups. Given keys will populate the hash in
|
806
|
+
* order but an invalid name will cause the hash to be immediately returned.
|
807
|
+
*
|
808
|
+
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
809
|
+
* returned in UTF-8 by default or ISO-8859-1 if the `:utf8` option for the
|
810
|
+
* {RE2::Regexp} is set to `false` (any other encoding's behaviour is undefined).
|
811
|
+
*
|
812
|
+
* @return [Hash] a hash of capturing group names to submatches
|
813
|
+
* @param [Array<Symbol>, nil] keys an array of `Symbol` capturing group names
|
814
|
+
* or `nil` to return all names
|
815
|
+
* @example
|
816
|
+
* m = RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')
|
817
|
+
* m.deconstruct_keys(nil) #=> {numbers: "123", letters: "abc"}
|
818
|
+
* m.deconstruct_keys([:numbers]) #=> {numbers: "123"}
|
819
|
+
* m.deconstruct_keys([:fruit]) #=> {}
|
820
|
+
* m.deconstruct_keys([:letters, :fruit]) #=> {letters: "abc"}
|
821
|
+
*
|
822
|
+
* @example pattern matching
|
823
|
+
* case RE2::Regexp.new('(?P<numbers>\d+) (?P<letters>[a-zA-Z]+)').match('123 abc')
|
824
|
+
* in numbers:, letters:
|
825
|
+
* puts "Numbers: #{numbers}, letters: #{letters}"
|
826
|
+
* else
|
827
|
+
* puts "Unrecognised match"
|
828
|
+
* end
|
829
|
+
*/
|
830
|
+
static VALUE re2_matchdata_deconstruct_keys(const VALUE self, const VALUE keys) {
|
831
|
+
re2_matchdata *m;
|
832
|
+
re2_pattern *p;
|
833
|
+
|
834
|
+
TypedData_Get_Struct(self, re2_matchdata, &re2_matchdata_data_type, m);
|
835
|
+
TypedData_Get_Struct(m->regexp, re2_pattern, &re2_regexp_data_type, p);
|
836
|
+
|
837
|
+
const std::map<std::string, int>& groups = p->pattern->NamedCapturingGroups();
|
838
|
+
VALUE capturing_groups = rb_hash_new();
|
839
|
+
|
840
|
+
if (NIL_P(keys)) {
|
841
|
+
for (std::map<std::string, int>::const_iterator it = groups.begin(); it != groups.end(); ++it) {
|
842
|
+
rb_hash_aset(capturing_groups,
|
843
|
+
ID2SYM(rb_intern(it->first.data())),
|
844
|
+
re2_matchdata_nth_match(it->second, self));
|
845
|
+
}
|
846
|
+
} else {
|
847
|
+
Check_Type(keys, T_ARRAY);
|
848
|
+
|
849
|
+
if (p->pattern->NumberOfCapturingGroups() >= RARRAY_LEN(keys)) {
|
850
|
+
for (int i = 0; i < RARRAY_LEN(keys); ++i) {
|
851
|
+
VALUE key = rb_ary_entry(keys, i);
|
852
|
+
Check_Type(key, T_SYMBOL);
|
853
|
+
const char *name = rb_id2name(SYM2ID(key));
|
854
|
+
std::map<std::string, int>::const_iterator search = groups.find(name);
|
855
|
+
|
856
|
+
if (search != groups.end()) {
|
857
|
+
rb_hash_aset(capturing_groups, key, re2_matchdata_nth_match(search->second, self));
|
858
|
+
} else {
|
859
|
+
break;
|
860
|
+
}
|
861
|
+
}
|
862
|
+
}
|
863
|
+
}
|
864
|
+
|
865
|
+
return capturing_groups;
|
866
|
+
}
|
867
|
+
|
868
|
+
/*
|
869
|
+
* Shorthand to compile a new {RE2::Regexp}.
|
870
|
+
*
|
871
|
+
* @see RE2::Regexp#initialize
|
872
|
+
*/
|
873
|
+
static VALUE re2_re2(int argc, VALUE *argv, VALUE) {
|
874
|
+
return rb_class_new_instance(argc, argv, re2_cRegexp);
|
875
|
+
}
|
876
|
+
|
877
|
+
/*
|
878
|
+
* Returns a new {RE2::Regexp} object with a compiled version of
|
879
|
+
* `pattern` stored inside.
|
880
|
+
*
|
881
|
+
* @overload initialize(pattern)
|
882
|
+
* Returns a new {RE2::Regexp} object with a compiled version of
|
883
|
+
* `pattern` stored inside with the default options.
|
884
|
+
*
|
885
|
+
* @param [String] pattern the pattern to compile
|
886
|
+
* @return [RE2::Regexp] a {RE2::Regexp} with the specified pattern
|
887
|
+
* @raise [TypeError] if the given pattern can't be coerced to a `String`
|
888
|
+
* @raise [NoMemoryError] if memory could not be allocated for the compiled
|
889
|
+
* pattern
|
890
|
+
*
|
891
|
+
* @overload initialize(pattern, options)
|
892
|
+
* Returns a new {RE2::Regexp} object with a compiled version of
|
893
|
+
* `pattern` stored inside with the specified options.
|
894
|
+
*
|
895
|
+
* @param [String] pattern the pattern to compile
|
896
|
+
* @param [Hash] options the options with which to compile the pattern
|
897
|
+
* @option options [Boolean] :utf8 (true) text and pattern are UTF-8; otherwise Latin-1
|
898
|
+
* @option options [Boolean] :posix_syntax (false) restrict regexps to POSIX egrep syntax
|
899
|
+
* @option options [Boolean] :longest_match (false) search for longest match, not first match
|
900
|
+
* @option options [Boolean] :log_errors (true) log syntax and execution errors to ERROR
|
901
|
+
* @option options [Integer] :max_mem approx. max memory footprint of RE2
|
902
|
+
* @option options [Boolean] :literal (false) interpret string as literal, not regexp
|
903
|
+
* @option options [Boolean] :never_nl (false) never match `\n`, even if it is in regexp
|
904
|
+
* @option options [Boolean] :case_sensitive (true) match is case-sensitive (regexp can override with `(?i)` unless in `posix_syntax` mode)
|
905
|
+
* @option options [Boolean] :perl_classes (false) allow Perl's `\d` `\s` `\w` `\D` `\S` `\W` when in `posix_syntax` mode
|
906
|
+
* @option options [Boolean] :word_boundary (false) allow `\b` `\B` (word boundary and not) when in `posix_syntax` mode
|
907
|
+
* @option options [Boolean] :one_line (false) `^` and `$` only match beginning and end of text when in `posix_syntax` mode
|
908
|
+
* @return [RE2::Regexp] a {RE2::Regexp} with the specified pattern and options
|
909
|
+
* @raise [TypeError] if the given pattern can't be coerced to a `String`
|
910
|
+
* @raise [NoMemoryError] if memory could not be allocated for the compiled pattern
|
911
|
+
*/
|
912
|
+
static VALUE re2_regexp_initialize(int argc, VALUE *argv, VALUE self) {
|
913
|
+
VALUE pattern, options;
|
914
|
+
re2_pattern *p;
|
915
|
+
|
916
|
+
rb_scan_args(argc, argv, "11", &pattern, &options);
|
917
|
+
|
918
|
+
/* Ensure pattern is a string. */
|
919
|
+
StringValue(pattern);
|
920
|
+
|
921
|
+
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
922
|
+
|
923
|
+
if (RTEST(options)) {
|
924
|
+
RE2::Options re2_options;
|
925
|
+
parse_re2_options(&re2_options, options);
|
926
|
+
|
927
|
+
p->pattern = new(std::nothrow) RE2(
|
928
|
+
re2::StringPiece(RSTRING_PTR(pattern), RSTRING_LEN(pattern)), re2_options);
|
929
|
+
} else {
|
930
|
+
p->pattern = new(std::nothrow) RE2(
|
931
|
+
re2::StringPiece(RSTRING_PTR(pattern), RSTRING_LEN(pattern)));
|
932
|
+
}
|
933
|
+
|
934
|
+
if (p->pattern == 0) {
|
935
|
+
rb_raise(rb_eNoMemError, "not enough memory to allocate RE2 object");
|
936
|
+
}
|
937
|
+
|
938
|
+
return self;
|
939
|
+
}
|
940
|
+
|
941
|
+
/*
|
942
|
+
* Returns a printable version of the regular expression.
|
943
|
+
*
|
944
|
+
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
945
|
+
* returned in UTF-8 by default or ISO-8859-1 if the `:utf8` option for the
|
946
|
+
* {RE2::Regexp} is set to `false` (any other encoding's behaviour is
|
947
|
+
* undefined).
|
948
|
+
*
|
949
|
+
* @return [String] a printable version of the regular expression
|
950
|
+
* @example
|
951
|
+
* re2 = RE2::Regexp.new("woo?")
|
952
|
+
* re2.inspect #=> "#<RE2::Regexp /woo?/>"
|
953
|
+
*/
|
954
|
+
static VALUE re2_regexp_inspect(const VALUE self) {
|
955
|
+
re2_pattern *p;
|
956
|
+
|
957
|
+
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
958
|
+
|
959
|
+
std::ostringstream output;
|
960
|
+
|
961
|
+
output << "#<RE2::Regexp /" << p->pattern->pattern() << "/>";
|
962
|
+
|
963
|
+
return encoded_str_new(output.str().data(), output.str().length(),
|
964
|
+
p->pattern->options().encoding());
|
965
|
+
}
|
966
|
+
|
967
|
+
/*
|
968
|
+
* Returns a string version of the regular expression.
|
969
|
+
*
|
970
|
+
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
971
|
+
* returned in UTF-8 by default or ISO-8859-1 if the `:utf8` option for the
|
972
|
+
* {RE2::Regexp} is set to `false` (any other encoding's behaviour is undefined).
|
973
|
+
*
|
974
|
+
* @return [String] a string version of the regular expression
|
975
|
+
* @example
|
976
|
+
* re2 = RE2::Regexp.new("woo?")
|
977
|
+
* re2.to_s #=> "woo?"
|
978
|
+
*/
|
979
|
+
static VALUE re2_regexp_to_s(const VALUE self) {
|
980
|
+
re2_pattern *p;
|
981
|
+
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
982
|
+
|
983
|
+
return encoded_str_new(p->pattern->pattern().data(),
|
984
|
+
p->pattern->pattern().size(),
|
985
|
+
p->pattern->options().encoding());
|
986
|
+
}
|
987
|
+
|
988
|
+
/*
|
989
|
+
* Returns whether or not the regular expression was compiled successfully.
|
990
|
+
*
|
991
|
+
* @return [Boolean] whether or not compilation was successful
|
992
|
+
* @example
|
993
|
+
* re2 = RE2::Regexp.new("woo?")
|
994
|
+
* re2.ok? #=> true
|
995
|
+
*/
|
996
|
+
static VALUE re2_regexp_ok(const VALUE self) {
|
997
|
+
re2_pattern *p;
|
998
|
+
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
999
|
+
|
1000
|
+
return BOOL2RUBY(p->pattern->ok());
|
1001
|
+
}
|
1002
|
+
|
1003
|
+
/*
|
1004
|
+
* Returns whether or not the regular expression was compiled with the `utf8`
|
1005
|
+
* option set to `true`.
|
1006
|
+
*
|
1007
|
+
* @return [Boolean] the `utf8` option
|
1008
|
+
* @example
|
1009
|
+
* re2 = RE2::Regexp.new("woo?", utf8: true)
|
1010
|
+
* re2.utf8? #=> true
|
1011
|
+
*/
|
1012
|
+
static VALUE re2_regexp_utf8(const VALUE self) {
|
1013
|
+
re2_pattern *p;
|
1014
|
+
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
1015
|
+
|
1016
|
+
return BOOL2RUBY(p->pattern->options().encoding() == RE2::Options::EncodingUTF8);
|
1017
|
+
}
|
1018
|
+
|
1019
|
+
/*
|
1020
|
+
* Returns whether or not the regular expression was compiled with the
|
1021
|
+
* `posix_syntax` option set to `true`.
|
1022
|
+
*
|
1023
|
+
* @return [Boolean] the `posix_syntax` option
|
1024
|
+
* @example
|
1025
|
+
* re2 = RE2::Regexp.new("woo?", posix_syntax: true)
|
1026
|
+
* re2.posix_syntax? #=> true
|
1027
|
+
*/
|
1028
|
+
static VALUE re2_regexp_posix_syntax(const VALUE self) {
|
1029
|
+
re2_pattern *p;
|
1030
|
+
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
1031
|
+
|
1032
|
+
return BOOL2RUBY(p->pattern->options().posix_syntax());
|
1033
|
+
}
|
1034
|
+
|
1035
|
+
/*
|
1036
|
+
* Returns whether or not the regular expression was compiled with the
|
1037
|
+
* `longest_match` option set to `true`.
|
1038
|
+
*
|
1039
|
+
* @return [Boolean] the `longest_match` option
|
1040
|
+
* @example
|
1041
|
+
* re2 = RE2::Regexp.new("woo?", longest_match: true)
|
1042
|
+
* re2.longest_match? #=> true
|
1043
|
+
*/
|
1044
|
+
static VALUE re2_regexp_longest_match(const VALUE self) {
|
1045
|
+
re2_pattern *p;
|
1046
|
+
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
1047
|
+
|
1048
|
+
return BOOL2RUBY(p->pattern->options().longest_match());
|
1049
|
+
}
|
1050
|
+
|
1051
|
+
/*
|
1052
|
+
* Returns whether or not the regular expression was compiled with the
|
1053
|
+
* `log_errors` option set to `true`.
|
1054
|
+
*
|
1055
|
+
* @return [Boolean] the `log_errors` option
|
1056
|
+
* @example
|
1057
|
+
* re2 = RE2::Regexp.new("woo?", log_errors: true)
|
1058
|
+
* re2.log_errors? #=> true
|
1059
|
+
*/
|
1060
|
+
static VALUE re2_regexp_log_errors(const VALUE self) {
|
1061
|
+
re2_pattern *p;
|
1062
|
+
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
1063
|
+
|
1064
|
+
return BOOL2RUBY(p->pattern->options().log_errors());
|
1065
|
+
}
|
1066
|
+
|
1067
|
+
/*
|
1068
|
+
* Returns the `max_mem` setting for the regular expression.
|
1069
|
+
*
|
1070
|
+
* @return [Integer] the `max_mem` option
|
1071
|
+
* @example
|
1072
|
+
* re2 = RE2::Regexp.new("woo?", max_mem: 1024)
|
1073
|
+
* re2.max_mem #=> 1024
|
1074
|
+
*/
|
1075
|
+
static VALUE re2_regexp_max_mem(const VALUE self) {
|
1076
|
+
re2_pattern *p;
|
1077
|
+
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
1078
|
+
|
1079
|
+
return INT2FIX(p->pattern->options().max_mem());
|
1080
|
+
}
|
1081
|
+
|
1082
|
+
/*
|
1083
|
+
* Returns whether or not the regular expression was compiled with the
|
1084
|
+
* `literal` option set to `true`.
|
1085
|
+
*
|
1086
|
+
* @return [Boolean] the `literal` option
|
1087
|
+
* @example
|
1088
|
+
* re2 = RE2::Regexp.new("woo?", literal: true)
|
1089
|
+
* re2.literal? #=> true
|
1090
|
+
*/
|
1091
|
+
static VALUE re2_regexp_literal(const VALUE self) {
|
1092
|
+
re2_pattern *p;
|
1093
|
+
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
1094
|
+
|
1095
|
+
return BOOL2RUBY(p->pattern->options().literal());
|
1096
|
+
}
|
1097
|
+
|
1098
|
+
/*
|
1099
|
+
* Returns whether or not the regular expression was compiled with the
|
1100
|
+
* `never_nl` option set to `true`.
|
1101
|
+
*
|
1102
|
+
* @return [Boolean] the `never_nl` option
|
1103
|
+
* @example
|
1104
|
+
* re2 = RE2::Regexp.new("woo?", never_nl: true)
|
1105
|
+
* re2.never_nl? #=> true
|
1106
|
+
*/
|
1107
|
+
static VALUE re2_regexp_never_nl(const VALUE self) {
|
1108
|
+
re2_pattern *p;
|
1109
|
+
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
1110
|
+
|
1111
|
+
return BOOL2RUBY(p->pattern->options().never_nl());
|
1112
|
+
}
|
1113
|
+
|
1114
|
+
/*
|
1115
|
+
* Returns whether or not the regular expression was compiled with the
|
1116
|
+
* `case_sensitive` option set to `true`.
|
1117
|
+
*
|
1118
|
+
* @return [Boolean] the `case_sensitive` option
|
1119
|
+
* @example
|
1120
|
+
* re2 = RE2::Regexp.new("woo?", case_sensitive: true)
|
1121
|
+
* re2.case_sensitive? #=> true
|
1122
|
+
*/
|
1123
|
+
static VALUE re2_regexp_case_sensitive(const VALUE self) {
|
1124
|
+
re2_pattern *p;
|
1125
|
+
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
1126
|
+
|
1127
|
+
return BOOL2RUBY(p->pattern->options().case_sensitive());
|
1128
|
+
}
|
1129
|
+
|
1130
|
+
/*
|
1131
|
+
* Returns whether or not the regular expression was compiled with the
|
1132
|
+
* `case_sensitive` option set to `false`.
|
1133
|
+
*
|
1134
|
+
* @return [Boolean] the inverse of the `case_sensitive` option
|
1135
|
+
* @example
|
1136
|
+
* re2 = RE2::Regexp.new("woo?", case_sensitive: true)
|
1137
|
+
* re2.case_insensitive? #=> false
|
1138
|
+
* re2.casefold? #=> false
|
1139
|
+
*/
|
1140
|
+
static VALUE re2_regexp_case_insensitive(const VALUE self) {
|
1141
|
+
return BOOL2RUBY(re2_regexp_case_sensitive(self) != Qtrue);
|
1142
|
+
}
|
1143
|
+
|
1144
|
+
/*
|
1145
|
+
* Returns whether or not the regular expression was compiled with the
|
1146
|
+
* perl_classes option set to `true`.
|
1147
|
+
*
|
1148
|
+
* @return [Boolean] the `perl_classes` option
|
1149
|
+
* @example
|
1150
|
+
* re2 = RE2::Regexp.new("woo?", perl_classes: true)
|
1151
|
+
* re2.perl_classes? #=> true
|
1152
|
+
*/
|
1153
|
+
static VALUE re2_regexp_perl_classes(const VALUE self) {
|
1154
|
+
re2_pattern *p;
|
1155
|
+
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
1156
|
+
|
1157
|
+
return BOOL2RUBY(p->pattern->options().perl_classes());
|
1158
|
+
}
|
1159
|
+
|
1160
|
+
/*
|
1161
|
+
* Returns whether or not the regular expression was compiled with the
|
1162
|
+
* `word_boundary` option set to `true`.
|
1163
|
+
*
|
1164
|
+
* @return [Boolean] the `word_boundary` option
|
1165
|
+
* @example
|
1166
|
+
* re2 = RE2::Regexp.new("woo?", word_boundary: true)
|
1167
|
+
* re2.word_boundary? #=> true
|
1168
|
+
*/
|
1169
|
+
static VALUE re2_regexp_word_boundary(const VALUE self) {
|
1170
|
+
re2_pattern *p;
|
1171
|
+
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
1172
|
+
|
1173
|
+
return BOOL2RUBY(p->pattern->options().word_boundary());
|
1174
|
+
}
|
1175
|
+
|
1176
|
+
/*
|
1177
|
+
* Returns whether or not the regular expression was compiled with the
|
1178
|
+
* `one_line` option set to `true`.
|
1179
|
+
*
|
1180
|
+
* @return [Boolean] the `one_line` option
|
1181
|
+
* @example
|
1182
|
+
* re2 = RE2::Regexp.new("woo?", one_line: true)
|
1183
|
+
* re2.one_line? #=> true
|
1184
|
+
*/
|
1185
|
+
static VALUE re2_regexp_one_line(const VALUE self) {
|
1186
|
+
re2_pattern *p;
|
1187
|
+
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
1188
|
+
|
1189
|
+
return BOOL2RUBY(p->pattern->options().one_line());
|
1190
|
+
}
|
1191
|
+
|
1192
|
+
/*
|
1193
|
+
* If the {RE2::Regexp} could not be created properly, returns an error string
|
1194
|
+
* otherwise returns `nil`.
|
1195
|
+
*
|
1196
|
+
* @return [String, nil] the error string or `nil`
|
1197
|
+
*/
|
1198
|
+
static VALUE re2_regexp_error(const VALUE self) {
|
1199
|
+
re2_pattern *p;
|
1200
|
+
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
1201
|
+
|
1202
|
+
if (p->pattern->ok()) {
|
1203
|
+
return Qnil;
|
1204
|
+
} else {
|
1205
|
+
return rb_str_new(p->pattern->error().data(), p->pattern->error().size());
|
1206
|
+
}
|
1207
|
+
}
|
1208
|
+
|
1209
|
+
/*
|
1210
|
+
* If the {RE2::Regexp} could not be created properly, returns
|
1211
|
+
* the offending portion of the regexp otherwise returns `nil`.
|
1212
|
+
*
|
1213
|
+
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
1214
|
+
* returned in UTF-8 by default or ISO-8859-1 if the `:utf8` option for the
|
1215
|
+
* {RE2::Regexp} is set to `false` (any other encoding's behaviour is undefined).
|
1216
|
+
*
|
1217
|
+
* @return [String, nil] the offending portion of the regexp or `nil`
|
1218
|
+
*/
|
1219
|
+
static VALUE re2_regexp_error_arg(const VALUE self) {
|
1220
|
+
re2_pattern *p;
|
1221
|
+
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
1222
|
+
|
1223
|
+
if (p->pattern->ok()) {
|
1224
|
+
return Qnil;
|
1225
|
+
} else {
|
1226
|
+
return encoded_str_new(p->pattern->error_arg().data(),
|
1227
|
+
p->pattern->error_arg().size(),
|
1228
|
+
p->pattern->options().encoding());
|
1229
|
+
}
|
1230
|
+
}
|
1231
|
+
|
1232
|
+
/*
|
1233
|
+
* Returns the program size, a very approximate measure
|
1234
|
+
* of a regexp's "cost". Larger numbers are more expensive
|
1235
|
+
* than smaller numbers.
|
1236
|
+
*
|
1237
|
+
* @return [Integer] the regexp "cost"
|
1238
|
+
*/
|
1239
|
+
static VALUE re2_regexp_program_size(const VALUE self) {
|
1240
|
+
re2_pattern *p;
|
1241
|
+
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
1242
|
+
|
1243
|
+
return INT2FIX(p->pattern->ProgramSize());
|
1244
|
+
}
|
1245
|
+
|
1246
|
+
/*
|
1247
|
+
* Returns a hash of the options currently set for the {RE2::Regexp}.
|
1248
|
+
*
|
1249
|
+
* @return [Hash] the options
|
1250
|
+
*/
|
1251
|
+
static VALUE re2_regexp_options(const VALUE self) {
|
1252
|
+
re2_pattern *p;
|
1253
|
+
|
1254
|
+
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
1255
|
+
VALUE options = rb_hash_new();
|
1256
|
+
|
1257
|
+
rb_hash_aset(options, ID2SYM(id_utf8),
|
1258
|
+
BOOL2RUBY(p->pattern->options().encoding() == RE2::Options::EncodingUTF8));
|
1259
|
+
|
1260
|
+
rb_hash_aset(options, ID2SYM(id_posix_syntax),
|
1261
|
+
BOOL2RUBY(p->pattern->options().posix_syntax()));
|
1262
|
+
|
1263
|
+
rb_hash_aset(options, ID2SYM(id_longest_match),
|
1264
|
+
BOOL2RUBY(p->pattern->options().longest_match()));
|
1265
|
+
|
1266
|
+
rb_hash_aset(options, ID2SYM(id_log_errors),
|
1267
|
+
BOOL2RUBY(p->pattern->options().log_errors()));
|
1268
|
+
|
1269
|
+
rb_hash_aset(options, ID2SYM(id_max_mem),
|
1270
|
+
INT2FIX(p->pattern->options().max_mem()));
|
1271
|
+
|
1272
|
+
rb_hash_aset(options, ID2SYM(id_literal),
|
1273
|
+
BOOL2RUBY(p->pattern->options().literal()));
|
1274
|
+
|
1275
|
+
rb_hash_aset(options, ID2SYM(id_never_nl),
|
1276
|
+
BOOL2RUBY(p->pattern->options().never_nl()));
|
1277
|
+
|
1278
|
+
rb_hash_aset(options, ID2SYM(id_case_sensitive),
|
1279
|
+
BOOL2RUBY(p->pattern->options().case_sensitive()));
|
1280
|
+
|
1281
|
+
rb_hash_aset(options, ID2SYM(id_perl_classes),
|
1282
|
+
BOOL2RUBY(p->pattern->options().perl_classes()));
|
1283
|
+
|
1284
|
+
rb_hash_aset(options, ID2SYM(id_word_boundary),
|
1285
|
+
BOOL2RUBY(p->pattern->options().word_boundary()));
|
1286
|
+
|
1287
|
+
rb_hash_aset(options, ID2SYM(id_one_line),
|
1288
|
+
BOOL2RUBY(p->pattern->options().one_line()));
|
1289
|
+
|
1290
|
+
/* This is a read-only hash after all... */
|
1291
|
+
rb_obj_freeze(options);
|
1292
|
+
|
1293
|
+
return options;
|
1294
|
+
}
|
1295
|
+
|
1296
|
+
/*
|
1297
|
+
* Returns the number of capturing subpatterns, or -1 if the regexp
|
1298
|
+
* wasn't valid on construction. The overall match (`$0`) does not
|
1299
|
+
* count: if the regexp is `"(a)(b)"`, returns 2.
|
1300
|
+
*
|
1301
|
+
* @return [Integer] the number of capturing subpatterns
|
1302
|
+
*/
|
1303
|
+
static VALUE re2_regexp_number_of_capturing_groups(const VALUE self) {
|
1304
|
+
re2_pattern *p;
|
1305
|
+
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
1306
|
+
|
1307
|
+
return INT2FIX(p->pattern->NumberOfCapturingGroups());
|
1308
|
+
}
|
1309
|
+
|
1310
|
+
/*
|
1311
|
+
* Returns a hash of names to capturing indices of groups.
|
1312
|
+
*
|
1313
|
+
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
1314
|
+
* returned in UTF-8 by default or ISO-8859-1 if the `:utf8` option for the
|
1315
|
+
* {RE2::Regexp} is set to `false` (any other encoding's behaviour is undefined).
|
1316
|
+
*
|
1317
|
+
* @return [Hash] a hash of names to capturing indices
|
1318
|
+
*/
|
1319
|
+
static VALUE re2_regexp_named_capturing_groups(const VALUE self) {
|
1320
|
+
re2_pattern *p;
|
1321
|
+
|
1322
|
+
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
1323
|
+
const std::map<std::string, int>& groups = p->pattern->NamedCapturingGroups();
|
1324
|
+
VALUE capturing_groups = rb_hash_new();
|
1325
|
+
|
1326
|
+
for (std::map<std::string, int>::const_iterator it = groups.begin(); it != groups.end(); ++it) {
|
1327
|
+
rb_hash_aset(capturing_groups,
|
1328
|
+
encoded_str_new(it->first.data(), it->first.size(),
|
1329
|
+
p->pattern->options().encoding()),
|
1330
|
+
INT2FIX(it->second));
|
1331
|
+
}
|
1332
|
+
|
1333
|
+
return capturing_groups;
|
1334
|
+
}
|
1335
|
+
|
1336
|
+
/*
|
1337
|
+
* General matching: match the pattern against the given `text` using
|
1338
|
+
* {https://github.com/google/re2/blob/bc0faab533e2b27b85b8ad312abf061e33ed6b5d/re2/re2.h#L562-L588
|
1339
|
+
* `Match`} and return a {RE2::MatchData} instance with the specified number of
|
1340
|
+
* submatches (defaults to the total number of capturing groups) or a boolean
|
1341
|
+
* (if no submatches are required).
|
1342
|
+
*
|
1343
|
+
* The number of submatches has a significant impact on performance: requesting
|
1344
|
+
* one submatch is much faster than requesting more than one and requesting
|
1345
|
+
* zero submatches is faster still.
|
1346
|
+
*
|
1347
|
+
* @overload match(text)
|
1348
|
+
* Returns a {RE2::MatchData} containing the matching pattern and all
|
1349
|
+
* submatches resulting from looking for the regexp in `text` if the pattern
|
1350
|
+
* contains capturing groups.
|
1351
|
+
*
|
1352
|
+
* Returns either `true` or `false` indicating whether a successful match was
|
1353
|
+
* made if the pattern contains no capturing groups.
|
1354
|
+
*
|
1355
|
+
* @param [String] text the text to search
|
1356
|
+
* @return [RE2::MatchData, nil] if the pattern contains capturing groups
|
1357
|
+
* @return [Boolean] if the pattern does not contain capturing groups
|
1358
|
+
* @raise [NoMemoryError] if there was not enough memory to allocate the submatches
|
1359
|
+
* @raise [TypeError] if given text that cannot be coerced to a `String`
|
1360
|
+
* @example Matching with capturing groups
|
1361
|
+
* r = RE2::Regexp.new('w(o)(o)')
|
1362
|
+
* r.match('woo') #=> #<RE2::MatchData "woo" 1:"o" 2:"o">
|
1363
|
+
* @example Matching without capturing groups
|
1364
|
+
* r = RE2::Regexp.new('woo')
|
1365
|
+
* r.match('woo') #=> true
|
1366
|
+
*
|
1367
|
+
* @overload match(text, options)
|
1368
|
+
* See `match(text)` but with customisable offsets for starting and ending
|
1369
|
+
* matches, optional anchoring to the start or both ends of the text and a
|
1370
|
+
* specific number of submatches to extract (padded with `nil`s if
|
1371
|
+
* necessary).
|
1372
|
+
*
|
1373
|
+
* @param [String] text the text to search
|
1374
|
+
* @param [Hash] options the options with which to perform the match
|
1375
|
+
* @option options [Integer] :startpos (0) offset at which to start matching
|
1376
|
+
* @option options [Integer] :endpos offset at which to stop matching, defaults to the text length
|
1377
|
+
* @option options [Symbol] :anchor (:unanchored) one of :unanchored, :anchor_start, :anchor_both to anchor the match
|
1378
|
+
* @option options [Integer] :submatches how many submatches to extract (0 is
|
1379
|
+
* fastest), defaults to the number of capturing groups
|
1380
|
+
* @return [RE2::MatchData, nil] if extracting any submatches
|
1381
|
+
* @return [Boolean] if not extracting any submatches
|
1382
|
+
* @raise [ArgumentError] if given a negative number of submatches, invalid
|
1383
|
+
* anchor or invalid startpos, endpos pair
|
1384
|
+
* @raise [NoMemoryError] if there was not enough memory to allocate the matches
|
1385
|
+
* @raise [TypeError] if given non-String text, non-numeric number of
|
1386
|
+
* submatches, non-symbol anchor or non-hash options
|
1387
|
+
* @raise [RE2::Regexp::UnsupportedError] if given an endpos argument on a
|
1388
|
+
* version of RE2 that does not support it
|
1389
|
+
* @example Matching with capturing groups
|
1390
|
+
* r = RE2::Regexp.new('w(o)(o)')
|
1391
|
+
* r.match('woo', submatches: 1) #=> #<RE2::MatchData "woo" 1:"o">
|
1392
|
+
* r.match('woo', submatches: 3) #=> #<RE2::MatchData "woo" 1:"o" 2:"o" 3:nil>
|
1393
|
+
* r.match('woot', anchor: :anchor_both, submatches: 0)
|
1394
|
+
* #=> false
|
1395
|
+
* r.match('woot', anchor: :anchor_start, submatches: 0)
|
1396
|
+
* #=> true
|
1397
|
+
* @example Matching without capturing groups
|
1398
|
+
* r = RE2::Regexp.new('wo+')
|
1399
|
+
* r.match('woot', anchor: :anchor_both) #=> false
|
1400
|
+
* r.match('woot', anchor: :anchor_start) #=> true
|
1401
|
+
*
|
1402
|
+
* @overload match(text, submatches)
|
1403
|
+
* @deprecated Legacy syntax for matching against `text` with a specific
|
1404
|
+
* number of submatches to extract. Use `match(text, submatches: n)` instead.
|
1405
|
+
*
|
1406
|
+
* @param [String] text the text to search
|
1407
|
+
* @param [Integer] submatches the number of submatches to extract
|
1408
|
+
* @return [RE2::MatchData, nil] if extracting any submatches
|
1409
|
+
* @return [Boolean] if not extracting any submatches
|
1410
|
+
* @raise [NoMemoryError] if there was not enough memory to allocate the submatches
|
1411
|
+
* @raise [TypeError] if given non-numeric number of submatches
|
1412
|
+
* @example
|
1413
|
+
* r = RE2::Regexp.new('w(o)(o)')
|
1414
|
+
* r.match('woo', 0) #=> true
|
1415
|
+
* r.match('woo', 1) #=> #<RE2::MatchData "woo" 1:"o">
|
1416
|
+
* r.match('woo', 2) #=> #<RE2::MatchData "woo" 1:"o" 2:"o">
|
1417
|
+
*/
|
1418
|
+
static VALUE re2_regexp_match(int argc, VALUE *argv, const VALUE self) {
|
1419
|
+
re2_pattern *p;
|
1420
|
+
re2_matchdata *m;
|
1421
|
+
VALUE text, options;
|
1422
|
+
|
1423
|
+
rb_scan_args(argc, argv, "11", &text, &options);
|
1424
|
+
|
1425
|
+
/* Ensure text is a string. */
|
1426
|
+
StringValue(text);
|
1427
|
+
|
1428
|
+
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
1429
|
+
|
1430
|
+
int n;
|
1431
|
+
int startpos = 0;
|
1432
|
+
int endpos = RSTRING_LEN(text);
|
1433
|
+
RE2::Anchor anchor = RE2::UNANCHORED;
|
1434
|
+
|
1435
|
+
if (RTEST(options)) {
|
1436
|
+
if (FIXNUM_P(options)) {
|
1437
|
+
n = NUM2INT(options);
|
1438
|
+
|
1439
|
+
if (n < 0) {
|
1440
|
+
rb_raise(rb_eArgError, "number of matches should be >= 0");
|
1441
|
+
}
|
1442
|
+
} else {
|
1443
|
+
if (TYPE(options) != T_HASH) {
|
1444
|
+
options = rb_Hash(options);
|
1445
|
+
}
|
1446
|
+
|
1447
|
+
VALUE endpos_option = rb_hash_aref(options, ID2SYM(id_endpos));
|
1448
|
+
if (!NIL_P(endpos_option)) {
|
1449
|
+
#ifdef HAVE_ENDPOS_ARGUMENT
|
1450
|
+
Check_Type(endpos_option, T_FIXNUM);
|
1451
|
+
|
1452
|
+
endpos = NUM2INT(endpos_option);
|
1453
|
+
|
1454
|
+
if (endpos < 0) {
|
1455
|
+
rb_raise(rb_eArgError, "endpos should be >= 0");
|
1456
|
+
}
|
1457
|
+
#else
|
1458
|
+
rb_raise(re2_eRegexpUnsupportedError, "current version of RE2::Match() does not support endpos argument");
|
1459
|
+
#endif
|
1460
|
+
}
|
1461
|
+
|
1462
|
+
VALUE anchor_option = rb_hash_aref(options, ID2SYM(id_anchor));
|
1463
|
+
if (!NIL_P(anchor_option)) {
|
1464
|
+
Check_Type(anchor_option, T_SYMBOL);
|
1465
|
+
|
1466
|
+
ID id_anchor_option = SYM2ID(anchor_option);
|
1467
|
+
if (id_anchor_option == id_unanchored) {
|
1468
|
+
anchor = RE2::UNANCHORED;
|
1469
|
+
} else if (id_anchor_option == id_anchor_start) {
|
1470
|
+
anchor = RE2::ANCHOR_START;
|
1471
|
+
} else if (id_anchor_option == id_anchor_both) {
|
1472
|
+
anchor = RE2::ANCHOR_BOTH;
|
1473
|
+
} else {
|
1474
|
+
rb_raise(rb_eArgError, "anchor should be one of: :unanchored, :anchor_start, :anchor_both");
|
1475
|
+
}
|
1476
|
+
}
|
1477
|
+
|
1478
|
+
VALUE submatches_option = rb_hash_aref(options, ID2SYM(id_submatches));
|
1479
|
+
if (!NIL_P(submatches_option)) {
|
1480
|
+
Check_Type(submatches_option, T_FIXNUM);
|
1481
|
+
|
1482
|
+
n = NUM2INT(submatches_option);
|
1483
|
+
|
1484
|
+
if (n < 0) {
|
1485
|
+
rb_raise(rb_eArgError, "number of matches should be >= 0");
|
1486
|
+
}
|
1487
|
+
} else {
|
1488
|
+
if (!p->pattern->ok()) {
|
1489
|
+
return Qnil;
|
1490
|
+
}
|
1491
|
+
|
1492
|
+
n = p->pattern->NumberOfCapturingGroups();
|
1493
|
+
}
|
1494
|
+
|
1495
|
+
VALUE startpos_option = rb_hash_aref(options, ID2SYM(id_startpos));
|
1496
|
+
if (!NIL_P(startpos_option)) {
|
1497
|
+
Check_Type(startpos_option, T_FIXNUM);
|
1498
|
+
|
1499
|
+
startpos = NUM2INT(startpos_option);
|
1500
|
+
|
1501
|
+
if (startpos < 0) {
|
1502
|
+
rb_raise(rb_eArgError, "startpos should be >= 0");
|
1503
|
+
}
|
1504
|
+
}
|
1505
|
+
}
|
1506
|
+
} else {
|
1507
|
+
if (!p->pattern->ok()) {
|
1508
|
+
return Qnil;
|
1509
|
+
}
|
1510
|
+
|
1511
|
+
n = p->pattern->NumberOfCapturingGroups();
|
1512
|
+
}
|
1513
|
+
|
1514
|
+
if (startpos > endpos) {
|
1515
|
+
rb_raise(rb_eArgError, "startpos should be <= endpos");
|
1516
|
+
}
|
1517
|
+
|
1518
|
+
if (n == 0) {
|
1519
|
+
#ifdef HAVE_ENDPOS_ARGUMENT
|
1520
|
+
bool matched = p->pattern->Match(
|
1521
|
+
re2::StringPiece(RSTRING_PTR(text), RSTRING_LEN(text)),
|
1522
|
+
startpos, endpos, anchor, 0, 0);
|
1523
|
+
#else
|
1524
|
+
bool matched = p->pattern->Match(
|
1525
|
+
re2::StringPiece(RSTRING_PTR(text), RSTRING_LEN(text)),
|
1526
|
+
startpos, anchor, 0, 0);
|
1527
|
+
#endif
|
1528
|
+
return BOOL2RUBY(matched);
|
1529
|
+
} else {
|
1530
|
+
/* Because match returns the whole match as well. */
|
1531
|
+
n += 1;
|
1532
|
+
|
1533
|
+
VALUE matchdata = rb_class_new_instance(0, 0, re2_cMatchData);
|
1534
|
+
TypedData_Get_Struct(matchdata, re2_matchdata, &re2_matchdata_data_type, m);
|
1535
|
+
m->matches = new(std::nothrow) re2::StringPiece[n];
|
1536
|
+
RB_OBJ_WRITE(matchdata, &m->regexp, self);
|
1537
|
+
if (!RTEST(rb_obj_frozen_p(text))) {
|
1538
|
+
text = rb_str_freeze(rb_str_dup(text));
|
1539
|
+
}
|
1540
|
+
RB_OBJ_WRITE(matchdata, &m->text, text);
|
1541
|
+
|
1542
|
+
if (m->matches == 0) {
|
1543
|
+
rb_raise(rb_eNoMemError,
|
1544
|
+
"not enough memory to allocate StringPieces for matches");
|
1545
|
+
}
|
1546
|
+
|
1547
|
+
m->number_of_matches = n;
|
1548
|
+
|
1549
|
+
#ifdef HAVE_ENDPOS_ARGUMENT
|
1550
|
+
bool matched = p->pattern->Match(
|
1551
|
+
re2::StringPiece(RSTRING_PTR(m->text), RSTRING_LEN(m->text)),
|
1552
|
+
startpos, endpos, anchor, m->matches, n);
|
1553
|
+
#else
|
1554
|
+
bool matched = p->pattern->Match(
|
1555
|
+
re2::StringPiece(RSTRING_PTR(m->text), RSTRING_LEN(m->text)),
|
1556
|
+
startpos, anchor, m->matches, n);
|
1557
|
+
#endif
|
1558
|
+
if (matched) {
|
1559
|
+
return matchdata;
|
1560
|
+
} else {
|
1561
|
+
return Qnil;
|
1562
|
+
}
|
1563
|
+
}
|
1564
|
+
}
|
1565
|
+
|
1566
|
+
/*
|
1567
|
+
* Returns true if the pattern matches any substring of the given text using
|
1568
|
+
* {https://github.com/google/re2/blob/bc0faab533e2b27b85b8ad312abf061e33ed6b5d/re2/re2.h#L413-L427
|
1569
|
+
* `PartialMatch`}.
|
1570
|
+
*
|
1571
|
+
* @return [Boolean] whether the match was successful
|
1572
|
+
* @raise [TypeError] if text cannot be coerced to a `String`
|
1573
|
+
*/
|
1574
|
+
static VALUE re2_regexp_match_p(const VALUE self, VALUE text) {
|
1575
|
+
re2_pattern *p;
|
1576
|
+
|
1577
|
+
/* Ensure text is a string. */
|
1578
|
+
StringValue(text);
|
1579
|
+
|
1580
|
+
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
1581
|
+
|
1582
|
+
return BOOL2RUBY(RE2::PartialMatch(
|
1583
|
+
re2::StringPiece(RSTRING_PTR(text), RSTRING_LEN(text)), *p->pattern));
|
1584
|
+
}
|
1585
|
+
|
1586
|
+
/*
|
1587
|
+
* Returns true if the pattern matches the given text using
|
1588
|
+
* {https://github.com/google/re2/blob/bc0faab533e2b27b85b8ad312abf061e33ed6b5d/re2/re2.h#L376-L411
|
1589
|
+
* `FullMatch`}.
|
1590
|
+
*
|
1591
|
+
* @return [Boolean] whether the match was successful
|
1592
|
+
* @raise [TypeError] if text cannot be coerced to a `String`
|
1593
|
+
*/
|
1594
|
+
static VALUE re2_regexp_full_match_p(const VALUE self, VALUE text) {
|
1595
|
+
re2_pattern *p;
|
1596
|
+
|
1597
|
+
/* Ensure text is a string. */
|
1598
|
+
StringValue(text);
|
1599
|
+
|
1600
|
+
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
1601
|
+
|
1602
|
+
return BOOL2RUBY(RE2::FullMatch(
|
1603
|
+
re2::StringPiece(RSTRING_PTR(text), RSTRING_LEN(text)), *p->pattern));
|
1604
|
+
}
|
1605
|
+
|
1606
|
+
/*
|
1607
|
+
* Returns a {RE2::Scanner} for scanning the given text incrementally with
|
1608
|
+
* {https://github.com/google/re2/blob/bc0faab533e2b27b85b8ad312abf061e33ed6b5d/re2/re2.h#L447-L463
|
1609
|
+
* `FindAndConsume`}.
|
1610
|
+
*
|
1611
|
+
* @param [text] text the text to scan incrementally
|
1612
|
+
* @return [RE2::Scanner] an `Enumerable` {RE2::Scanner} object
|
1613
|
+
* @raise [TypeError] if `text` cannot be coerced to a `String`
|
1614
|
+
* @example
|
1615
|
+
* c = RE2::Regexp.new('(\w+)').scan("Foo bar baz")
|
1616
|
+
* #=> #<RE2::Scanner:0x0000000000000001>
|
1617
|
+
*/
|
1618
|
+
static VALUE re2_regexp_scan(const VALUE self, VALUE text) {
|
1619
|
+
/* Ensure text is a string. */
|
1620
|
+
StringValue(text);
|
1621
|
+
|
1622
|
+
re2_pattern *p;
|
1623
|
+
re2_scanner *c;
|
1624
|
+
|
1625
|
+
TypedData_Get_Struct(self, re2_pattern, &re2_regexp_data_type, p);
|
1626
|
+
VALUE scanner = rb_class_new_instance(0, 0, re2_cScanner);
|
1627
|
+
TypedData_Get_Struct(scanner, re2_scanner, &re2_scanner_data_type, c);
|
1628
|
+
|
1629
|
+
c->input = new(std::nothrow) re2::StringPiece(
|
1630
|
+
RSTRING_PTR(text), RSTRING_LEN(text));
|
1631
|
+
RB_OBJ_WRITE(scanner, &c->regexp, self);
|
1632
|
+
RB_OBJ_WRITE(scanner, &c->text, text);
|
1633
|
+
|
1634
|
+
if (p->pattern->ok()) {
|
1635
|
+
c->number_of_capturing_groups = p->pattern->NumberOfCapturingGroups();
|
1636
|
+
} else {
|
1637
|
+
c->number_of_capturing_groups = 0;
|
1638
|
+
}
|
1639
|
+
|
1640
|
+
c->eof = false;
|
1641
|
+
|
1642
|
+
return scanner;
|
1643
|
+
}
|
1644
|
+
|
1645
|
+
/*
|
1646
|
+
* Returns whether the underlying RE2 version supports passing an `endpos`
|
1647
|
+
* argument to
|
1648
|
+
* {https://github.com/google/re2/blob/bc0faab533e2b27b85b8ad312abf061e33ed6b5d/re2/re2.h#L562-L588
|
1649
|
+
* Match}. If not, {RE2::Regexp#match} will raise an error if attempting to
|
1650
|
+
* pass an `endpos`.
|
1651
|
+
*
|
1652
|
+
* @return [Boolean] whether the underlying
|
1653
|
+
* {https://github.com/google/re2/blob/bc0faab533e2b27b85b8ad312abf061e33ed6b5d/re2/re2.h#L562-L588
|
1654
|
+
* Match} has an endpos argument
|
1655
|
+
*/
|
1656
|
+
static VALUE re2_regexp_match_has_endpos_argument_p(VALUE) {
|
1657
|
+
#ifdef HAVE_ENDPOS_ARGUMENT
|
1658
|
+
return Qtrue;
|
1659
|
+
#else
|
1660
|
+
return Qfalse;
|
1661
|
+
#endif
|
1662
|
+
}
|
1663
|
+
|
1664
|
+
/*
|
1665
|
+
* Returns a copy of `str` with the first occurrence `pattern` replaced with
|
1666
|
+
* `rewrite` using
|
1667
|
+
* {https://github.com/google/re2/blob/bc0faab533e2b27b85b8ad312abf061e33ed6b5d/re2/re2.h#L465-L480
|
1668
|
+
* `Replace`}.
|
1669
|
+
*
|
1670
|
+
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
1671
|
+
* returned in UTF-8 by default or ISO-8859-1 if the `:utf8` option for the
|
1672
|
+
* {RE2::Regexp} is set to `false` (any other encoding's behaviour is undefined).
|
1673
|
+
*
|
1674
|
+
* @param [String] str the string to modify
|
1675
|
+
* @param [String, RE2::Regexp] pattern a regexp matching text to be replaced
|
1676
|
+
* @param [String] rewrite the string to replace with
|
1677
|
+
* @return [String] the resulting string
|
1678
|
+
* @raise [TypeError] if the given rewrite or pattern (if not provided as a
|
1679
|
+
* {RE2::Regexp}) cannot be coerced to `String`s
|
1680
|
+
* @example
|
1681
|
+
* RE2.Replace("hello there", "hello", "howdy") #=> "howdy there"
|
1682
|
+
* re2 = RE2::Regexp.new("hel+o")
|
1683
|
+
* RE2.Replace("hello there", re2, "yo") #=> "yo there"
|
1684
|
+
*/
|
1685
|
+
static VALUE re2_Replace(VALUE, VALUE str, VALUE pattern,
|
1686
|
+
VALUE rewrite) {
|
1687
|
+
/* Ensure rewrite is a string. */
|
1688
|
+
StringValue(rewrite);
|
1689
|
+
|
1690
|
+
re2_pattern *p;
|
1691
|
+
|
1692
|
+
/* Take a copy of str so it can be modified in-place by
|
1693
|
+
* RE2::Replace.
|
1694
|
+
*/
|
1695
|
+
StringValue(str);
|
1696
|
+
std::string str_as_string(RSTRING_PTR(str), RSTRING_LEN(str));
|
1697
|
+
|
1698
|
+
/* Do the replacement. */
|
1699
|
+
if (rb_obj_is_kind_of(pattern, re2_cRegexp)) {
|
1700
|
+
TypedData_Get_Struct(pattern, re2_pattern, &re2_regexp_data_type, p);
|
1701
|
+
RE2::Replace(&str_as_string, *p->pattern,
|
1702
|
+
re2::StringPiece(RSTRING_PTR(rewrite), RSTRING_LEN(rewrite)));
|
1703
|
+
|
1704
|
+
return encoded_str_new(str_as_string.data(), str_as_string.size(),
|
1705
|
+
p->pattern->options().encoding());
|
1706
|
+
} else {
|
1707
|
+
/* Ensure pattern is a string. */
|
1708
|
+
StringValue(pattern);
|
1709
|
+
|
1710
|
+
RE2::Replace(&str_as_string,
|
1711
|
+
re2::StringPiece(RSTRING_PTR(pattern), RSTRING_LEN(pattern)),
|
1712
|
+
re2::StringPiece(RSTRING_PTR(rewrite), RSTRING_LEN(rewrite)));
|
1713
|
+
|
1714
|
+
return encoded_str_new(str_as_string.data(), str_as_string.size(), RE2::Options::EncodingUTF8);
|
1715
|
+
}
|
1716
|
+
}
|
1717
|
+
|
1718
|
+
/*
|
1719
|
+
* Return a copy of `str` with `pattern` replaced by `rewrite` using
|
1720
|
+
* {https://github.com/google/re2/blob/bc0faab533e2b27b85b8ad312abf061e33ed6b5d/re2/re2.h#L482-L497
|
1721
|
+
* `GlobalReplace`}.
|
1722
|
+
*
|
1723
|
+
* Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be
|
1724
|
+
* returned in UTF-8 by default or ISO-8859-1 if the `:utf8` option for the
|
1725
|
+
* {RE2::Regexp} is set to `false` (any other encoding's behaviour is undefined).
|
1726
|
+
*
|
1727
|
+
* @param [String] str the string to modify
|
1728
|
+
* @param [String, RE2::Regexp] pattern a regexp matching text to be replaced
|
1729
|
+
* @param [String] rewrite the string to replace with
|
1730
|
+
* @raise [TypeError] if the given rewrite or pattern (if not provided as a
|
1731
|
+
* {RE2::Regexp}) cannot be coerced to `String`s
|
1732
|
+
* @return [String] the resulting string
|
1733
|
+
* @example
|
1734
|
+
* re2 = RE2::Regexp.new("oo?")
|
1735
|
+
* RE2.GlobalReplace("whoops-doops", re2, "e") #=> "wheps-deps"
|
1736
|
+
* RE2.GlobalReplace("hello there", "e", "i") #=> "hillo thiri"
|
1737
|
+
*/
|
1738
|
+
static VALUE re2_GlobalReplace(VALUE, VALUE str, VALUE pattern,
|
1739
|
+
VALUE rewrite) {
|
1740
|
+
/* Ensure rewrite is a string. */
|
1741
|
+
StringValue(rewrite);
|
1742
|
+
|
1743
|
+
/* Take a copy of str so it can be modified in-place by
|
1744
|
+
* RE2::GlobalReplace.
|
1745
|
+
*/
|
1746
|
+
re2_pattern *p;
|
1747
|
+
StringValue(str);
|
1748
|
+
std::string str_as_string(RSTRING_PTR(str), RSTRING_LEN(str));
|
1749
|
+
|
1750
|
+
/* Do the replacement. */
|
1751
|
+
if (rb_obj_is_kind_of(pattern, re2_cRegexp)) {
|
1752
|
+
TypedData_Get_Struct(pattern, re2_pattern, &re2_regexp_data_type, p);
|
1753
|
+
RE2::GlobalReplace(&str_as_string, *p->pattern,
|
1754
|
+
re2::StringPiece(RSTRING_PTR(rewrite), RSTRING_LEN(rewrite)));
|
1755
|
+
|
1756
|
+
return encoded_str_new(str_as_string.data(), str_as_string.size(),
|
1757
|
+
p->pattern->options().encoding());
|
1758
|
+
} else {
|
1759
|
+
/* Ensure pattern is a string. */
|
1760
|
+
StringValue(pattern);
|
1761
|
+
|
1762
|
+
RE2::GlobalReplace(&str_as_string,
|
1763
|
+
re2::StringPiece(RSTRING_PTR(pattern), RSTRING_LEN(pattern)),
|
1764
|
+
re2::StringPiece(RSTRING_PTR(rewrite), RSTRING_LEN(rewrite)));
|
1765
|
+
|
1766
|
+
return encoded_str_new(str_as_string.data(), str_as_string.size(), RE2::Options::EncodingUTF8);
|
1767
|
+
}
|
1768
|
+
}
|
1769
|
+
|
1770
|
+
/*
|
1771
|
+
* Returns a version of `str` with all potentially meaningful regexp characters
|
1772
|
+
* escaped using
|
1773
|
+
* {https://github.com/google/re2/blob/bc0faab533e2b27b85b8ad312abf061e33ed6b5d/re2/re2.h#L512-L518
|
1774
|
+
* `QuoteMeta`}. The returned string, used as a regular expression, will
|
1775
|
+
* exactly match the original string.
|
1776
|
+
*
|
1777
|
+
* @param [String] unquoted the unquoted string
|
1778
|
+
* @raise [TypeError] if the given unquoted string cannot be coerced to a `String`
|
1779
|
+
* @return [String] the escaped string
|
1780
|
+
* @example
|
1781
|
+
* RE2::Regexp.escape("1.5-2.0?") #=> "1\.5\-2\.0\?"
|
1782
|
+
*/
|
1783
|
+
static VALUE re2_QuoteMeta(VALUE, VALUE unquoted) {
|
1784
|
+
StringValue(unquoted);
|
1785
|
+
|
1786
|
+
std::string quoted_string = RE2::QuoteMeta(
|
1787
|
+
re2::StringPiece(RSTRING_PTR(unquoted), RSTRING_LEN(unquoted)));
|
1788
|
+
|
1789
|
+
return rb_str_new(quoted_string.data(), quoted_string.size());
|
1790
|
+
}
|
1791
|
+
|
1792
|
+
static void re2_set_free(void *ptr) {
|
1793
|
+
re2_set *s = reinterpret_cast<re2_set *>(ptr);
|
1794
|
+
if (s->set) {
|
1795
|
+
delete s->set;
|
1796
|
+
}
|
1797
|
+
xfree(s);
|
1798
|
+
}
|
1799
|
+
|
1800
|
+
static size_t re2_set_memsize(const void *ptr) {
|
1801
|
+
const re2_set *s = reinterpret_cast<const re2_set *>(ptr);
|
1802
|
+
size_t size = sizeof(*s);
|
1803
|
+
if (s->set) {
|
1804
|
+
size += sizeof(*s->set);
|
1805
|
+
}
|
1806
|
+
|
1807
|
+
return size;
|
1808
|
+
}
|
1809
|
+
|
1810
|
+
static const rb_data_type_t re2_set_data_type = {
|
1811
|
+
"RE2::Set",
|
1812
|
+
{
|
1813
|
+
0,
|
1814
|
+
re2_set_free,
|
1815
|
+
re2_set_memsize,
|
1816
|
+
},
|
1817
|
+
0,
|
1818
|
+
0,
|
1819
|
+
// IMPORTANT: WB_PROTECTED objects must only use the RB_OBJ_WRITE()
|
1820
|
+
// macro to update VALUE references, as to trigger write barriers.
|
1821
|
+
RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED
|
1822
|
+
};
|
1823
|
+
|
1824
|
+
static VALUE re2_set_allocate(VALUE klass) {
|
1825
|
+
re2_set *s;
|
1826
|
+
VALUE result = TypedData_Make_Struct(klass, re2_set, &re2_set_data_type, s);
|
1827
|
+
|
1828
|
+
return result;
|
1829
|
+
}
|
1830
|
+
|
1831
|
+
/*
|
1832
|
+
* Returns a new {RE2::Set} object, a collection of patterns that can be
|
1833
|
+
* searched for simultaneously.
|
1834
|
+
*
|
1835
|
+
* @return [RE2::Set]
|
1836
|
+
*
|
1837
|
+
* @overload initialize
|
1838
|
+
* Returns a new {RE2::Set} object for unanchored patterns with the default
|
1839
|
+
* options.
|
1840
|
+
*
|
1841
|
+
* @raise [NoMemoryError] if memory could not be allocated for the compiled pattern
|
1842
|
+
* @return [RE2::Set]
|
1843
|
+
*
|
1844
|
+
* @overload initialize(anchor)
|
1845
|
+
* Returns a new {RE2::Set} object for the specified anchor with the default
|
1846
|
+
* options.
|
1847
|
+
*
|
1848
|
+
* @param [Symbol] anchor one of `:unanchored`, `:anchor_start`, `:anchor_both`
|
1849
|
+
* @raise [ArgumentError] if anchor is not `:unanchored`, `:anchor_start` or `:anchor_both`
|
1850
|
+
* @raise [NoMemoryError] if memory could not be allocated for the compiled pattern
|
1851
|
+
*
|
1852
|
+
* @overload initialize(anchor, options)
|
1853
|
+
* Returns a new {RE2::Set} object with the specified options.
|
1854
|
+
*
|
1855
|
+
* @param [Symbol] anchor one of `:unanchored`, `:anchor_start`, `:anchor_both`
|
1856
|
+
* @param [Hash] options the options with which to compile the pattern
|
1857
|
+
* @option options [Boolean] :utf8 (true) text and pattern are UTF-8; otherwise Latin-1
|
1858
|
+
* @option options [Boolean] :posix_syntax (false) restrict regexps to POSIX egrep syntax
|
1859
|
+
* @option options [Boolean] :longest_match (false) search for longest match, not first match
|
1860
|
+
* @option options [Boolean] :log_errors (true) log syntax and execution errors to ERROR
|
1861
|
+
* @option options [Integer] :max_mem approx. max memory footprint of RE2
|
1862
|
+
* @option options [Boolean] :literal (false) interpret string as literal, not regexp
|
1863
|
+
* @option options [Boolean] :never_nl (false) never match `\n`, even if it is in regexp
|
1864
|
+
* @option options [Boolean] :case_sensitive (true) match is case-sensitive (regexp can override with `(?i)` unless in `posix_syntax` mode)
|
1865
|
+
* @option options [Boolean] :perl_classes (false) allow Perl's `\d` `\s` `\w` `\D` `\S` `\W` when in `posix_syntax` mode
|
1866
|
+
* @option options [Boolean] :word_boundary (false) allow `\b` `\B` (word boundary and not) when in `posix_syntax` mode
|
1867
|
+
* @option options [Boolean] :one_line (false) `^` and `$` only match beginning and end of text when in `posix_syntax` mode
|
1868
|
+
* @return [RE2::Set] a {RE2::Set} with the specified anchor and options
|
1869
|
+
* @raise [ArgumentError] if `anchor` is not one of the accepted choices
|
1870
|
+
* @raise [NoMemoryError] if memory could not be allocated for the compiled pattern
|
1871
|
+
*/
|
1872
|
+
static VALUE re2_set_initialize(int argc, VALUE *argv, VALUE self) {
|
1873
|
+
VALUE anchor, options;
|
1874
|
+
re2_set *s;
|
1875
|
+
|
1876
|
+
rb_scan_args(argc, argv, "02", &anchor, &options);
|
1877
|
+
TypedData_Get_Struct(self, re2_set, &re2_set_data_type, s);
|
1878
|
+
|
1879
|
+
RE2::Anchor re2_anchor = RE2::UNANCHORED;
|
1880
|
+
|
1881
|
+
if (!NIL_P(anchor)) {
|
1882
|
+
Check_Type(anchor, T_SYMBOL);
|
1883
|
+
ID id_anchor_arg = SYM2ID(anchor);
|
1884
|
+
if (id_anchor_arg == id_unanchored) {
|
1885
|
+
re2_anchor = RE2::UNANCHORED;
|
1886
|
+
} else if (id_anchor_arg == id_anchor_start) {
|
1887
|
+
re2_anchor = RE2::ANCHOR_START;
|
1888
|
+
} else if (id_anchor_arg == id_anchor_both) {
|
1889
|
+
re2_anchor = RE2::ANCHOR_BOTH;
|
1890
|
+
} else {
|
1891
|
+
rb_raise(rb_eArgError, "anchor should be one of: :unanchored, :anchor_start, :anchor_both");
|
1892
|
+
}
|
1893
|
+
}
|
1894
|
+
|
1895
|
+
RE2::Options re2_options;
|
1896
|
+
|
1897
|
+
if (RTEST(options)) {
|
1898
|
+
parse_re2_options(&re2_options, options);
|
1899
|
+
}
|
1900
|
+
|
1901
|
+
s->set = new(std::nothrow) RE2::Set(re2_options, re2_anchor);
|
1902
|
+
if (s->set == 0) {
|
1903
|
+
rb_raise(rb_eNoMemError, "not enough memory to allocate RE2::Set object");
|
1904
|
+
}
|
1905
|
+
|
1906
|
+
return self;
|
1907
|
+
}
|
1908
|
+
|
1909
|
+
/*
|
1910
|
+
* Adds a pattern to the set. Returns the index that will identify the pattern
|
1911
|
+
* in the output of {RE2::Set#match}. Cannot be called after {RE2::Set#compile}
|
1912
|
+
* has been called.
|
1913
|
+
*
|
1914
|
+
* @param [String] pattern the regex pattern
|
1915
|
+
* @return [Integer] the index of the pattern in the set
|
1916
|
+
* @raise [ArgumentError] if called after compile or the pattern is rejected
|
1917
|
+
* @example
|
1918
|
+
* set = RE2::Set.new
|
1919
|
+
* set.add("abc") #=> 0
|
1920
|
+
* set.add("def") #=> 1
|
1921
|
+
*/
|
1922
|
+
static VALUE re2_set_add(VALUE self, VALUE pattern) {
|
1923
|
+
StringValue(pattern);
|
1924
|
+
|
1925
|
+
re2_set *s;
|
1926
|
+
TypedData_Get_Struct(self, re2_set, &re2_set_data_type, s);
|
1927
|
+
|
1928
|
+
/* To prevent the memory of the err string leaking when we call rb_raise,
|
1929
|
+
* take a copy of it and let it go out of scope.
|
1930
|
+
*/
|
1931
|
+
char msg[100];
|
1932
|
+
int index;
|
1933
|
+
|
1934
|
+
{
|
1935
|
+
std::string err;
|
1936
|
+
index = s->set->Add(
|
1937
|
+
re2::StringPiece(RSTRING_PTR(pattern), RSTRING_LEN(pattern)), &err);
|
1938
|
+
strlcpy(msg, err.c_str(), sizeof(msg));
|
1939
|
+
}
|
1940
|
+
|
1941
|
+
if (index < 0) {
|
1942
|
+
rb_raise(rb_eArgError, "str rejected by RE2::Set->Add(): %s", msg);
|
1943
|
+
}
|
1944
|
+
|
1945
|
+
return INT2FIX(index);
|
1946
|
+
}
|
1947
|
+
|
1948
|
+
/*
|
1949
|
+
* Compiles a {RE2::Set} so it can be used to match against. Must be called
|
1950
|
+
* after {RE2::Set#add} and before {RE2::Set#match}.
|
1951
|
+
*
|
1952
|
+
* @return [Boolean] whether compilation was a success
|
1953
|
+
* @example
|
1954
|
+
* set = RE2::Set.new
|
1955
|
+
* set.add("abc")
|
1956
|
+
* set.compile #=> true
|
1957
|
+
*/
|
1958
|
+
static VALUE re2_set_compile(VALUE self) {
|
1959
|
+
re2_set *s;
|
1960
|
+
TypedData_Get_Struct(self, re2_set, &re2_set_data_type, s);
|
1961
|
+
|
1962
|
+
return BOOL2RUBY(s->set->Compile());
|
1963
|
+
}
|
1964
|
+
|
1965
|
+
/*
|
1966
|
+
* Returns whether the underlying RE2 version outputs error information from
|
1967
|
+
* {https://github.com/google/re2/blob/bc0faab533e2b27b85b8ad312abf061e33ed6b5d/re2/set.h#L62-L65
|
1968
|
+
* `RE2::Set::Match`}. If not, {RE2::Set#match} will raise an error if attempting to set
|
1969
|
+
* its `:exception` option to `true`.
|
1970
|
+
*
|
1971
|
+
* @return [Boolean] whether the underlying RE2 outputs error information from {RE2::Set} matches
|
1972
|
+
*/
|
1973
|
+
static VALUE re2_set_match_raises_errors_p(VALUE) {
|
1974
|
+
#ifdef HAVE_ERROR_INFO_ARGUMENT
|
1975
|
+
return Qtrue;
|
1976
|
+
#else
|
1977
|
+
return Qfalse;
|
1978
|
+
#endif
|
1979
|
+
}
|
1980
|
+
|
1981
|
+
/*
|
1982
|
+
* Matches the given text against patterns in the set, returning an array of
|
1983
|
+
* integer indices of the matching patterns if matched or an empty array if
|
1984
|
+
* there are no matches.
|
1985
|
+
*
|
1986
|
+
* @return [Array<Integer>]
|
1987
|
+
*
|
1988
|
+
* @overload match(str)
|
1989
|
+
* Returns an array of integer indices of patterns matching the given string
|
1990
|
+
* (if any). Raises exceptions if there are any errors while matching.
|
1991
|
+
*
|
1992
|
+
* @param [String] str the text to match against
|
1993
|
+
* @return [Array<Integer>] the indices of matching regexps
|
1994
|
+
* @raise [MatchError] if an error occurs while matching
|
1995
|
+
* @raise [UnsupportedError] if the underlying version of RE2 does not output error information
|
1996
|
+
* @example
|
1997
|
+
* set = RE2::Set.new
|
1998
|
+
* set.add("abc")
|
1999
|
+
* set.add("def")
|
2000
|
+
* set.compile
|
2001
|
+
* set.match("abcdef") #=> [0, 1]
|
2002
|
+
*
|
2003
|
+
* @overload match(str, options)
|
2004
|
+
* Returns an array of integer indices of patterns matching the given string
|
2005
|
+
* (if any). Raises exceptions if there are any errors while matching and the
|
2006
|
+
* `:exception` option is set to true.
|
2007
|
+
*
|
2008
|
+
* @param [String] str the text to match against
|
2009
|
+
* @param [Hash] options the options with which to match
|
2010
|
+
* @option options [Boolean] :exception (true) whether to raise exceptions with RE2's error information (not supported on ABI version 0 of RE2)
|
2011
|
+
* @return [Array<Integer>] the indices of matching regexps
|
2012
|
+
* @raise [MatchError] if an error occurs while matching
|
2013
|
+
* @raise [UnsupportedError] if the underlying version of RE2 does not output error information
|
2014
|
+
* @example
|
2015
|
+
* set = RE2::Set.new
|
2016
|
+
* set.add("abc")
|
2017
|
+
* set.add("def")
|
2018
|
+
* set.compile
|
2019
|
+
* set.match("abcdef", exception: true) #=> [0, 1]
|
2020
|
+
*/
|
2021
|
+
static VALUE re2_set_match(int argc, VALUE *argv, const VALUE self) {
|
2022
|
+
VALUE str, options;
|
2023
|
+
bool raise_exception = true;
|
2024
|
+
rb_scan_args(argc, argv, "11", &str, &options);
|
2025
|
+
|
2026
|
+
StringValue(str);
|
2027
|
+
re2_set *s;
|
2028
|
+
TypedData_Get_Struct(self, re2_set, &re2_set_data_type, s);
|
2029
|
+
|
2030
|
+
if (RTEST(options)) {
|
2031
|
+
Check_Type(options, T_HASH);
|
2032
|
+
|
2033
|
+
VALUE exception_option = rb_hash_aref(options, ID2SYM(id_exception));
|
2034
|
+
if (!NIL_P(exception_option)) {
|
2035
|
+
raise_exception = RTEST(exception_option);
|
2036
|
+
}
|
2037
|
+
}
|
2038
|
+
|
2039
|
+
std::vector<int> v;
|
2040
|
+
|
2041
|
+
if (raise_exception) {
|
2042
|
+
#ifdef HAVE_ERROR_INFO_ARGUMENT
|
2043
|
+
RE2::Set::ErrorInfo e;
|
2044
|
+
bool match_failed = !s->set->Match(
|
2045
|
+
re2::StringPiece(RSTRING_PTR(str), RSTRING_LEN(str)), &v, &e);
|
2046
|
+
VALUE result = rb_ary_new2(v.size());
|
2047
|
+
|
2048
|
+
if (match_failed) {
|
2049
|
+
switch (e.kind) {
|
2050
|
+
case RE2::Set::kNoError:
|
2051
|
+
break;
|
2052
|
+
case RE2::Set::kNotCompiled:
|
2053
|
+
rb_raise(re2_eSetMatchError, "#match must not be called before #compile");
|
2054
|
+
case RE2::Set::kOutOfMemory:
|
2055
|
+
rb_raise(re2_eSetMatchError, "The DFA ran out of memory");
|
2056
|
+
case RE2::Set::kInconsistent:
|
2057
|
+
rb_raise(re2_eSetMatchError, "RE2::Prog internal error");
|
2058
|
+
default: // Just in case a future version of libre2 adds new ErrorKinds
|
2059
|
+
rb_raise(re2_eSetMatchError, "Unknown RE2::Set::ErrorKind: %d", e.kind);
|
2060
|
+
}
|
2061
|
+
} else {
|
2062
|
+
for (std::vector<int>::size_type i = 0; i < v.size(); ++i) {
|
2063
|
+
rb_ary_push(result, INT2FIX(v[i]));
|
2064
|
+
}
|
2065
|
+
}
|
2066
|
+
|
2067
|
+
return result;
|
2068
|
+
#else
|
2069
|
+
rb_raise(re2_eSetUnsupportedError, "current version of RE2::Set::Match() does not output error information, :exception option can only be set to false");
|
2070
|
+
#endif
|
2071
|
+
} else {
|
2072
|
+
bool matched = s->set->Match(
|
2073
|
+
re2::StringPiece(RSTRING_PTR(str), RSTRING_LEN(str)), &v);
|
2074
|
+
VALUE result = rb_ary_new2(v.size());
|
2075
|
+
|
2076
|
+
if (matched) {
|
2077
|
+
for (std::vector<int>::size_type i = 0; i < v.size(); ++i) {
|
2078
|
+
rb_ary_push(result, INT2FIX(v[i]));
|
2079
|
+
}
|
2080
|
+
}
|
2081
|
+
|
2082
|
+
return result;
|
2083
|
+
}
|
2084
|
+
}
|
2085
|
+
|
2086
|
+
extern "C" void Init_re2(void) {
|
2087
|
+
re2_mRE2 = rb_define_module("RE2");
|
2088
|
+
re2_cRegexp = rb_define_class_under(re2_mRE2, "Regexp", rb_cObject);
|
2089
|
+
re2_eRegexpUnsupportedError = rb_define_class_under(re2_cRegexp,
|
2090
|
+
"UnsupportedError", rb_const_get(rb_cObject, rb_intern("StandardError")));
|
2091
|
+
re2_cMatchData = rb_define_class_under(re2_mRE2, "MatchData", rb_cObject);
|
2092
|
+
re2_cScanner = rb_define_class_under(re2_mRE2, "Scanner", rb_cObject);
|
2093
|
+
re2_cSet = rb_define_class_under(re2_mRE2, "Set", rb_cObject);
|
2094
|
+
re2_eSetMatchError = rb_define_class_under(re2_cSet, "MatchError",
|
2095
|
+
rb_const_get(rb_cObject, rb_intern("StandardError")));
|
2096
|
+
re2_eSetUnsupportedError = rb_define_class_under(re2_cSet, "UnsupportedError",
|
2097
|
+
rb_const_get(rb_cObject, rb_intern("StandardError")));
|
2098
|
+
|
2099
|
+
rb_define_alloc_func(re2_cRegexp,
|
2100
|
+
reinterpret_cast<VALUE (*)(VALUE)>(re2_regexp_allocate));
|
2101
|
+
rb_define_alloc_func(re2_cMatchData,
|
2102
|
+
reinterpret_cast<VALUE (*)(VALUE)>(re2_matchdata_allocate));
|
2103
|
+
rb_define_alloc_func(re2_cScanner,
|
2104
|
+
reinterpret_cast<VALUE (*)(VALUE)>(re2_scanner_allocate));
|
2105
|
+
rb_define_alloc_func(re2_cSet,
|
2106
|
+
reinterpret_cast<VALUE (*)(VALUE)>(re2_set_allocate));
|
2107
|
+
|
2108
|
+
rb_define_method(re2_cMatchData, "string",
|
2109
|
+
RUBY_METHOD_FUNC(re2_matchdata_string), 0);
|
2110
|
+
rb_define_method(re2_cMatchData, "regexp",
|
2111
|
+
RUBY_METHOD_FUNC(re2_matchdata_regexp), 0);
|
2112
|
+
rb_define_method(re2_cMatchData, "to_a",
|
2113
|
+
RUBY_METHOD_FUNC(re2_matchdata_to_a), 0);
|
2114
|
+
rb_define_method(re2_cMatchData, "size",
|
2115
|
+
RUBY_METHOD_FUNC(re2_matchdata_size), 0);
|
2116
|
+
rb_define_method(re2_cMatchData, "length",
|
2117
|
+
RUBY_METHOD_FUNC(re2_matchdata_size), 0);
|
2118
|
+
rb_define_method(re2_cMatchData, "begin",
|
2119
|
+
RUBY_METHOD_FUNC(re2_matchdata_begin), 1);
|
2120
|
+
rb_define_method(re2_cMatchData, "end",
|
2121
|
+
RUBY_METHOD_FUNC(re2_matchdata_end), 1);
|
2122
|
+
rb_define_method(re2_cMatchData, "[]", RUBY_METHOD_FUNC(re2_matchdata_aref),
|
2123
|
+
-1);
|
2124
|
+
rb_define_method(re2_cMatchData, "to_s",
|
2125
|
+
RUBY_METHOD_FUNC(re2_matchdata_to_s), 0);
|
2126
|
+
rb_define_method(re2_cMatchData, "inspect",
|
2127
|
+
RUBY_METHOD_FUNC(re2_matchdata_inspect), 0);
|
2128
|
+
rb_define_method(re2_cMatchData, "deconstruct",
|
2129
|
+
RUBY_METHOD_FUNC(re2_matchdata_deconstruct), 0);
|
2130
|
+
rb_define_method(re2_cMatchData, "deconstruct_keys",
|
2131
|
+
RUBY_METHOD_FUNC(re2_matchdata_deconstruct_keys), 1);
|
2132
|
+
|
2133
|
+
rb_define_method(re2_cScanner, "string",
|
2134
|
+
RUBY_METHOD_FUNC(re2_scanner_string), 0);
|
2135
|
+
rb_define_method(re2_cScanner, "eof?",
|
2136
|
+
RUBY_METHOD_FUNC(re2_scanner_eof), 0);
|
2137
|
+
rb_define_method(re2_cScanner, "regexp",
|
2138
|
+
RUBY_METHOD_FUNC(re2_scanner_regexp), 0);
|
2139
|
+
rb_define_method(re2_cScanner, "scan",
|
2140
|
+
RUBY_METHOD_FUNC(re2_scanner_scan), 0);
|
2141
|
+
rb_define_method(re2_cScanner, "rewind",
|
2142
|
+
RUBY_METHOD_FUNC(re2_scanner_rewind), 0);
|
2143
|
+
|
2144
|
+
rb_define_singleton_method(re2_cRegexp, "match_has_endpos_argument?",
|
2145
|
+
RUBY_METHOD_FUNC(re2_regexp_match_has_endpos_argument_p), 0);
|
2146
|
+
rb_define_method(re2_cRegexp, "initialize",
|
2147
|
+
RUBY_METHOD_FUNC(re2_regexp_initialize), -1);
|
2148
|
+
rb_define_method(re2_cRegexp, "ok?", RUBY_METHOD_FUNC(re2_regexp_ok), 0);
|
2149
|
+
rb_define_method(re2_cRegexp, "error", RUBY_METHOD_FUNC(re2_regexp_error),
|
2150
|
+
0);
|
2151
|
+
rb_define_method(re2_cRegexp, "error_arg",
|
2152
|
+
RUBY_METHOD_FUNC(re2_regexp_error_arg), 0);
|
2153
|
+
rb_define_method(re2_cRegexp, "program_size",
|
2154
|
+
RUBY_METHOD_FUNC(re2_regexp_program_size), 0);
|
2155
|
+
rb_define_method(re2_cRegexp, "options",
|
2156
|
+
RUBY_METHOD_FUNC(re2_regexp_options), 0);
|
2157
|
+
rb_define_method(re2_cRegexp, "number_of_capturing_groups",
|
2158
|
+
RUBY_METHOD_FUNC(re2_regexp_number_of_capturing_groups), 0);
|
2159
|
+
rb_define_method(re2_cRegexp, "named_capturing_groups",
|
2160
|
+
RUBY_METHOD_FUNC(re2_regexp_named_capturing_groups), 0);
|
2161
|
+
rb_define_method(re2_cRegexp, "match", RUBY_METHOD_FUNC(re2_regexp_match),
|
2162
|
+
-1);
|
2163
|
+
rb_define_method(re2_cRegexp, "match?", RUBY_METHOD_FUNC(re2_regexp_match_p),
|
2164
|
+
1);
|
2165
|
+
rb_define_method(re2_cRegexp, "partial_match?",
|
2166
|
+
RUBY_METHOD_FUNC(re2_regexp_match_p), 1);
|
2167
|
+
rb_define_method(re2_cRegexp, "=~", RUBY_METHOD_FUNC(re2_regexp_match_p), 1);
|
2168
|
+
rb_define_method(re2_cRegexp, "===", RUBY_METHOD_FUNC(re2_regexp_match_p), 1);
|
2169
|
+
rb_define_method(re2_cRegexp, "full_match?",
|
2170
|
+
RUBY_METHOD_FUNC(re2_regexp_full_match_p), 1);
|
2171
|
+
rb_define_method(re2_cRegexp, "scan",
|
2172
|
+
RUBY_METHOD_FUNC(re2_regexp_scan), 1);
|
2173
|
+
rb_define_method(re2_cRegexp, "to_s", RUBY_METHOD_FUNC(re2_regexp_to_s), 0);
|
2174
|
+
rb_define_method(re2_cRegexp, "to_str", RUBY_METHOD_FUNC(re2_regexp_to_s),
|
2175
|
+
0);
|
2176
|
+
rb_define_method(re2_cRegexp, "pattern", RUBY_METHOD_FUNC(re2_regexp_to_s),
|
2177
|
+
0);
|
2178
|
+
rb_define_method(re2_cRegexp, "source", RUBY_METHOD_FUNC(re2_regexp_to_s),
|
2179
|
+
0);
|
2180
|
+
rb_define_method(re2_cRegexp, "inspect",
|
2181
|
+
RUBY_METHOD_FUNC(re2_regexp_inspect), 0);
|
2182
|
+
rb_define_method(re2_cRegexp, "utf8?", RUBY_METHOD_FUNC(re2_regexp_utf8),
|
2183
|
+
0);
|
2184
|
+
rb_define_method(re2_cRegexp, "posix_syntax?",
|
2185
|
+
RUBY_METHOD_FUNC(re2_regexp_posix_syntax), 0);
|
2186
|
+
rb_define_method(re2_cRegexp, "longest_match?",
|
2187
|
+
RUBY_METHOD_FUNC(re2_regexp_longest_match), 0);
|
2188
|
+
rb_define_method(re2_cRegexp, "log_errors?",
|
2189
|
+
RUBY_METHOD_FUNC(re2_regexp_log_errors), 0);
|
2190
|
+
rb_define_method(re2_cRegexp, "max_mem",
|
2191
|
+
RUBY_METHOD_FUNC(re2_regexp_max_mem), 0);
|
2192
|
+
rb_define_method(re2_cRegexp, "literal?",
|
2193
|
+
RUBY_METHOD_FUNC(re2_regexp_literal), 0);
|
2194
|
+
rb_define_method(re2_cRegexp, "never_nl?",
|
2195
|
+
RUBY_METHOD_FUNC(re2_regexp_never_nl), 0);
|
2196
|
+
rb_define_method(re2_cRegexp, "case_sensitive?",
|
2197
|
+
RUBY_METHOD_FUNC(re2_regexp_case_sensitive), 0);
|
2198
|
+
rb_define_method(re2_cRegexp, "case_insensitive?",
|
2199
|
+
RUBY_METHOD_FUNC(re2_regexp_case_insensitive), 0);
|
2200
|
+
rb_define_method(re2_cRegexp, "casefold?",
|
2201
|
+
RUBY_METHOD_FUNC(re2_regexp_case_insensitive), 0);
|
2202
|
+
rb_define_method(re2_cRegexp, "perl_classes?",
|
2203
|
+
RUBY_METHOD_FUNC(re2_regexp_perl_classes), 0);
|
2204
|
+
rb_define_method(re2_cRegexp, "word_boundary?",
|
2205
|
+
RUBY_METHOD_FUNC(re2_regexp_word_boundary), 0);
|
2206
|
+
rb_define_method(re2_cRegexp, "one_line?",
|
2207
|
+
RUBY_METHOD_FUNC(re2_regexp_one_line), 0);
|
2208
|
+
|
2209
|
+
rb_define_singleton_method(re2_cSet, "match_raises_errors?",
|
2210
|
+
RUBY_METHOD_FUNC(re2_set_match_raises_errors_p), 0);
|
2211
|
+
rb_define_method(re2_cSet, "initialize",
|
2212
|
+
RUBY_METHOD_FUNC(re2_set_initialize), -1);
|
2213
|
+
rb_define_method(re2_cSet, "add", RUBY_METHOD_FUNC(re2_set_add), 1);
|
2214
|
+
rb_define_method(re2_cSet, "compile", RUBY_METHOD_FUNC(re2_set_compile), 0);
|
2215
|
+
rb_define_method(re2_cSet, "match", RUBY_METHOD_FUNC(re2_set_match), -1);
|
2216
|
+
|
2217
|
+
rb_define_module_function(re2_mRE2, "Replace",
|
2218
|
+
RUBY_METHOD_FUNC(re2_Replace), 3);
|
2219
|
+
rb_define_module_function(re2_mRE2, "GlobalReplace",
|
2220
|
+
RUBY_METHOD_FUNC(re2_GlobalReplace), 3);
|
2221
|
+
rb_define_module_function(re2_mRE2, "QuoteMeta",
|
2222
|
+
RUBY_METHOD_FUNC(re2_QuoteMeta), 1);
|
2223
|
+
rb_define_singleton_method(re2_cRegexp, "escape",
|
2224
|
+
RUBY_METHOD_FUNC(re2_QuoteMeta), 1);
|
2225
|
+
rb_define_singleton_method(re2_cRegexp, "quote",
|
2226
|
+
RUBY_METHOD_FUNC(re2_QuoteMeta), 1);
|
2227
|
+
|
2228
|
+
// (see RE2::Regexp#initialize)
|
2229
|
+
rb_define_singleton_method(re2_cRegexp, "compile",
|
2230
|
+
RUBY_METHOD_FUNC(rb_class_new_instance), -1);
|
2231
|
+
|
2232
|
+
rb_define_module_function(rb_mKernel, "RE2", RUBY_METHOD_FUNC(re2_re2), -1);
|
2233
|
+
|
2234
|
+
/* Create the symbols used in options. */
|
2235
|
+
id_utf8 = rb_intern("utf8");
|
2236
|
+
id_posix_syntax = rb_intern("posix_syntax");
|
2237
|
+
id_longest_match = rb_intern("longest_match");
|
2238
|
+
id_log_errors = rb_intern("log_errors");
|
2239
|
+
id_max_mem = rb_intern("max_mem");
|
2240
|
+
id_literal = rb_intern("literal");
|
2241
|
+
id_never_nl = rb_intern("never_nl");
|
2242
|
+
id_case_sensitive = rb_intern("case_sensitive");
|
2243
|
+
id_perl_classes = rb_intern("perl_classes");
|
2244
|
+
id_word_boundary = rb_intern("word_boundary");
|
2245
|
+
id_one_line = rb_intern("one_line");
|
2246
|
+
id_unanchored = rb_intern("unanchored");
|
2247
|
+
id_anchor = rb_intern("anchor");
|
2248
|
+
id_anchor_start = rb_intern("anchor_start");
|
2249
|
+
id_anchor_both = rb_intern("anchor_both");
|
2250
|
+
id_exception = rb_intern("exception");
|
2251
|
+
id_submatches = rb_intern("submatches");
|
2252
|
+
id_startpos = rb_intern("startpos");
|
2253
|
+
id_endpos = rb_intern("endpos");
|
2254
|
+
}
|