re2 2.1.0-x86_64-darwin → 2.1.2-x86_64-darwin

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ea860967fe64f6452e07f85449e0437708d9e69a2867942c097f5ddf35e11aef
4
- data.tar.gz: e8c73abe9b730e38f8f4354fe56c97c6e01ce2940835c6f4dcccb974c9c2752a
3
+ metadata.gz: bedd6794a58aaf95ae2c194cf944c3c839ccaac6de661d99d07befff302cdaa3
4
+ data.tar.gz: def7f76a4e91abe08a6fc9593618a8621a9209b2861e838bcdafd04de527df72
5
5
  SHA512:
6
- metadata.gz: 6ce1b910c7e9e5017fb206ebe9713db7ca9c0cc2b218fdf69c6e78e31af95cadf75607be3011b45e52f93d0fb6c2f92fcb4797bc084bb252e5828edb1cf4db3e
7
- data.tar.gz: 542563bfd28615027f9e9ccb66e4998d9ec7ebf4bf13058c9d9d4c6e8005bf8dbcd2cc45a8b4dff3fa0288831e4cf61ca9020e646f17ade11124b54d6f07c84e
6
+ metadata.gz: 28d5223a1da4b82ad71b13d1042c338c8b1150538678bb4606475a936ad71d81b3536d9211d617dce1629e66a5c96a586aa9a91c4334297ba9ddcd4d11e4fd8a
7
+ data.tar.gz: 5cebd7aee0e2d0269e92e9b0e6e0547163acf54c6c19e35ec9e56b689f788c0b24abfdd1a3320de99cf4de55cd2ecc8e2d3e638b03691901d1b19b9393147a18
data/README.md CHANGED
@@ -5,7 +5,7 @@ Ruby bindings to [RE2][], a "fast, safe, thread-friendly alternative to
5
5
  backtracking regular expression engines like those used in PCRE, Perl, and
6
6
  Python".
7
7
 
8
- **Current version:** 2.1.0
8
+ **Current version:** 2.1.2
9
9
  **Supported Ruby versions:** 2.6, 2.7, 3.0, 3.1, 3.2
10
10
  **Bundled RE2 version:** libre2.11 (2023-09-01)
11
11
  **Supported RE2 versions:** libre2.0 (< 2020-03-02), libre2.1 (2020-03-02), libre2.6 (2020-03-03), libre2.7 (2020-05-01), libre2.8 (2020-07-06), libre2.9 (2020-11-01), libre2.10 (2022-12-01), libre2.11 (2023-07-01)
@@ -262,7 +262,7 @@ Contributions
262
262
  the deprecation and removal of the `utf8` encoding option in RE2;
263
263
  * Thanks to [Sergio Medina](https://github.com/serch) for reporting a bug when
264
264
  using `RE2::Scanner#scan` with an invalid regular expression;
265
- * Thanks to [Pritam Baral](https://github.com/pritambaral) for contributed the
265
+ * Thanks to [Pritam Baral](https://github.com/pritambaral) for contributing the
266
266
  initial support for `RE2::Set`.
267
267
 
268
268
  Contact
data/ext/re2/extconf.rb CHANGED
@@ -128,7 +128,6 @@ def build_extension(static_p = false)
128
128
 
129
129
  have_library("stdc++")
130
130
  have_header("stdint.h")
131
- have_func("rb_str_sublen")
132
131
 
133
132
  if !static_p and !have_library("re2")
134
133
  abort "You must have re2 installed and specified with --with-re2-dir, please see https://github.com/google/re2/wiki/Install"
data/ext/re2/re2.cc CHANGED
@@ -6,68 +6,21 @@
6
6
  * Released under the BSD Licence, please see LICENSE.txt
7
7
  */
8
8
 
9
- #include <ruby.h>
10
- #include <re2/re2.h>
11
- #include <re2/set.h>
12
9
  #include <stdint.h>
13
- #include <string>
10
+
11
+ #include <map>
14
12
  #include <sstream>
13
+ #include <string>
15
14
  #include <vector>
16
- using std::string;
17
- using std::ostringstream;
18
- using std::nothrow;
19
- using std::map;
20
- using std::vector;
15
+
16
+ #include <re2/re2.h>
17
+ #include <re2/set.h>
18
+ #include <ruby.h>
19
+ #include <ruby/encoding.h>
21
20
 
22
21
  #define BOOL2RUBY(v) (v ? Qtrue : Qfalse)
23
22
  #define UNUSED(x) ((void)x)
24
23
 
25
- #ifndef RSTRING_LEN
26
- #define RSTRING_LEN(x) (RSTRING(x)->len)
27
- #endif
28
-
29
- #ifndef RSTRING_PTR
30
- #define RSTRING_PTR(x) (RSTRING(x)->ptr)
31
- #endif
32
-
33
- #ifdef HAVE_RUBY_ENCODING_H
34
- #include <ruby/encoding.h>
35
- #define ENCODED_STR_NEW(str, length, encoding) \
36
- ({ \
37
- VALUE _string = rb_str_new(str, length); \
38
- int _enc = rb_enc_find_index(encoding); \
39
- rb_enc_associate_index(_string, _enc); \
40
- _string; \
41
- })
42
- #else
43
- #define ENCODED_STR_NEW(str, length, encoding) \
44
- rb_str_new((const char *)str, (long)length)
45
- #endif
46
-
47
- #ifdef HAVE_RB_STR_SUBLEN
48
- #define ENCODED_STR_SUBLEN(str, offset, encoding) \
49
- LONG2NUM(rb_str_sublen(str, offset))
50
- #else
51
- #ifdef HAVE_RUBY_ENCODING_H
52
- #define ENCODED_STR_SUBLEN(str, offset, encoding) \
53
- ({ \
54
- VALUE _string = ENCODED_STR_NEW(RSTRING_PTR(str), offset, encoding); \
55
- rb_str_length(_string); \
56
- })
57
- #else
58
- #define ENCODED_STR_SUBLEN(str, offset, encoding) \
59
- LONG2NUM(offset)
60
- #endif
61
- #endif
62
-
63
- #ifdef HAVE_ENDPOS_ARGUMENT
64
- #define match(pattern, text, startpos, endpos, anchor, match, nmatch) \
65
- (pattern->Match(text, startpos, endpos, anchor, match, nmatch))
66
- #else
67
- #define match(pattern, text, startpos, endpos, anchor, match, nmatch) \
68
- (pattern->Match(text, startpos, anchor, match, nmatch))
69
- #endif
70
-
71
24
  typedef struct {
72
25
  RE2 *pattern;
73
26
  } re2_pattern;
@@ -98,95 +51,103 @@ static ID id_utf8, id_posix_syntax, id_longest_match, id_log_errors,
98
51
  id_perl_classes, id_word_boundary, id_one_line,
99
52
  id_unanchored, id_anchor_start, id_anchor_both, id_exception;
100
53
 
101
- void parse_re2_options(RE2::Options& re2_options, VALUE options) {
54
+ inline VALUE encoded_str_new(const char *str, long length, RE2::Options::Encoding encoding) {
55
+ if (encoding == RE2::Options::EncodingUTF8) {
56
+ return rb_utf8_str_new(str, length);
57
+ }
58
+
59
+ VALUE string = rb_str_new(str, length);
60
+ rb_enc_associate_index(string, rb_enc_find_index("ISO-8859-1"));
61
+
62
+ return string;
63
+ }
64
+
65
+ static void parse_re2_options(RE2::Options* re2_options, const VALUE options) {
102
66
  if (TYPE(options) != T_HASH) {
103
67
  rb_raise(rb_eArgError, "options should be a hash");
104
68
  }
105
- VALUE utf8, posix_syntax, longest_match, log_errors,
106
- max_mem, literal, never_nl, case_sensitive, perl_classes,
107
- word_boundary, one_line;
108
69
 
109
- utf8 = rb_hash_aref(options, ID2SYM(id_utf8));
70
+ VALUE utf8 = rb_hash_aref(options, ID2SYM(id_utf8));
110
71
  if (!NIL_P(utf8)) {
111
- re2_options.set_encoding(RTEST(utf8) ? RE2::Options::EncodingUTF8 : RE2::Options::EncodingLatin1);
72
+ re2_options->set_encoding(RTEST(utf8) ? RE2::Options::EncodingUTF8 : RE2::Options::EncodingLatin1);
112
73
  }
113
74
 
114
- posix_syntax = rb_hash_aref(options, ID2SYM(id_posix_syntax));
75
+ VALUE posix_syntax = rb_hash_aref(options, ID2SYM(id_posix_syntax));
115
76
  if (!NIL_P(posix_syntax)) {
116
- re2_options.set_posix_syntax(RTEST(posix_syntax));
77
+ re2_options->set_posix_syntax(RTEST(posix_syntax));
117
78
  }
118
79
 
119
- longest_match = rb_hash_aref(options, ID2SYM(id_longest_match));
80
+ VALUE longest_match = rb_hash_aref(options, ID2SYM(id_longest_match));
120
81
  if (!NIL_P(longest_match)) {
121
- re2_options.set_longest_match(RTEST(longest_match));
82
+ re2_options->set_longest_match(RTEST(longest_match));
122
83
  }
123
84
 
124
- log_errors = rb_hash_aref(options, ID2SYM(id_log_errors));
85
+ VALUE log_errors = rb_hash_aref(options, ID2SYM(id_log_errors));
125
86
  if (!NIL_P(log_errors)) {
126
- re2_options.set_log_errors(RTEST(log_errors));
87
+ re2_options->set_log_errors(RTEST(log_errors));
127
88
  }
128
89
 
129
- max_mem = rb_hash_aref(options, ID2SYM(id_max_mem));
90
+ VALUE max_mem = rb_hash_aref(options, ID2SYM(id_max_mem));
130
91
  if (!NIL_P(max_mem)) {
131
- re2_options.set_max_mem(NUM2INT(max_mem));
92
+ re2_options->set_max_mem(NUM2INT(max_mem));
132
93
  }
133
94
 
134
- literal = rb_hash_aref(options, ID2SYM(id_literal));
95
+ VALUE literal = rb_hash_aref(options, ID2SYM(id_literal));
135
96
  if (!NIL_P(literal)) {
136
- re2_options.set_literal(RTEST(literal));
97
+ re2_options->set_literal(RTEST(literal));
137
98
  }
138
99
 
139
- never_nl = rb_hash_aref(options, ID2SYM(id_never_nl));
100
+ VALUE never_nl = rb_hash_aref(options, ID2SYM(id_never_nl));
140
101
  if (!NIL_P(never_nl)) {
141
- re2_options.set_never_nl(RTEST(never_nl));
102
+ re2_options->set_never_nl(RTEST(never_nl));
142
103
  }
143
104
 
144
- case_sensitive = rb_hash_aref(options, ID2SYM(id_case_sensitive));
105
+ VALUE case_sensitive = rb_hash_aref(options, ID2SYM(id_case_sensitive));
145
106
  if (!NIL_P(case_sensitive)) {
146
- re2_options.set_case_sensitive(RTEST(case_sensitive));
107
+ re2_options->set_case_sensitive(RTEST(case_sensitive));
147
108
  }
148
109
 
149
- perl_classes = rb_hash_aref(options, ID2SYM(id_perl_classes));
110
+ VALUE perl_classes = rb_hash_aref(options, ID2SYM(id_perl_classes));
150
111
  if (!NIL_P(perl_classes)) {
151
- re2_options.set_perl_classes(RTEST(perl_classes));
112
+ re2_options->set_perl_classes(RTEST(perl_classes));
152
113
  }
153
114
 
154
- word_boundary = rb_hash_aref(options, ID2SYM(id_word_boundary));
115
+ VALUE word_boundary = rb_hash_aref(options, ID2SYM(id_word_boundary));
155
116
  if (!NIL_P(word_boundary)) {
156
- re2_options.set_word_boundary(RTEST(word_boundary));
117
+ re2_options->set_word_boundary(RTEST(word_boundary));
157
118
  }
158
119
 
159
- one_line = rb_hash_aref(options, ID2SYM(id_one_line));
120
+ VALUE one_line = rb_hash_aref(options, ID2SYM(id_one_line));
160
121
  if (!NIL_P(one_line)) {
161
- re2_options.set_one_line(RTEST(one_line));
122
+ re2_options->set_one_line(RTEST(one_line));
162
123
  }
163
124
  }
164
125
 
165
- void re2_matchdata_mark(re2_matchdata* self) {
126
+ static void re2_matchdata_mark(re2_matchdata* self) {
166
127
  rb_gc_mark(self->regexp);
167
128
  rb_gc_mark(self->text);
168
129
  }
169
130
 
170
- void re2_matchdata_free(re2_matchdata* self) {
131
+ static void re2_matchdata_free(re2_matchdata* self) {
171
132
  if (self->matches) {
172
133
  delete[] self->matches;
173
134
  }
174
135
  free(self);
175
136
  }
176
137
 
177
- void re2_scanner_mark(re2_scanner* self) {
138
+ static void re2_scanner_mark(re2_scanner* self) {
178
139
  rb_gc_mark(self->regexp);
179
140
  rb_gc_mark(self->text);
180
141
  }
181
142
 
182
- void re2_scanner_free(re2_scanner* self) {
143
+ static void re2_scanner_free(re2_scanner* self) {
183
144
  if (self->input) {
184
145
  delete self->input;
185
146
  }
186
147
  free(self);
187
148
  }
188
149
 
189
- void re2_regexp_free(re2_pattern* self) {
150
+ static void re2_regexp_free(re2_pattern* self) {
190
151
  if (self->pattern) {
191
152
  delete self->pattern;
192
153
  }
@@ -195,12 +156,14 @@ void re2_regexp_free(re2_pattern* self) {
195
156
 
196
157
  static VALUE re2_matchdata_allocate(VALUE klass) {
197
158
  re2_matchdata *m;
159
+
198
160
  return Data_Make_Struct(klass, re2_matchdata, re2_matchdata_mark,
199
161
  re2_matchdata_free, m);
200
162
  }
201
163
 
202
164
  static VALUE re2_scanner_allocate(VALUE klass) {
203
165
  re2_scanner *c;
166
+
204
167
  return Data_Make_Struct(klass, re2_scanner, re2_scanner_mark,
205
168
  re2_scanner_free, c);
206
169
  }
@@ -213,7 +176,7 @@ static VALUE re2_scanner_allocate(VALUE klass) {
213
176
  * m = RE2::Regexp.new('(\d+)').match("bob 123")
214
177
  * m.string #=> "bob 123"
215
178
  */
216
- static VALUE re2_matchdata_string(VALUE self) {
179
+ static VALUE re2_matchdata_string(const VALUE self) {
217
180
  re2_matchdata *m;
218
181
  Data_Get_Struct(self, re2_matchdata, m);
219
182
 
@@ -228,7 +191,7 @@ static VALUE re2_matchdata_string(VALUE self) {
228
191
  * c = RE2::Regexp.new('(\d+)').scan("foo")
229
192
  * c.string #=> "foo"
230
193
  */
231
- static VALUE re2_scanner_string(VALUE self) {
194
+ static VALUE re2_scanner_string(const VALUE self) {
232
195
  re2_scanner *c;
233
196
  Data_Get_Struct(self, re2_scanner, c);
234
197
 
@@ -243,7 +206,7 @@ static VALUE re2_scanner_string(VALUE self) {
243
206
  * c = RE2::Regexp.new('(\d+)').scan("foo")
244
207
  * c.eof? #=> true
245
208
  */
246
- static VALUE re2_scanner_eof(VALUE self) {
209
+ static VALUE re2_scanner_eof(const VALUE self) {
247
210
  re2_scanner *c;
248
211
  Data_Get_Struct(self, re2_scanner, c);
249
212
 
@@ -265,7 +228,7 @@ static VALUE re2_scanner_rewind(VALUE self) {
265
228
  re2_scanner *c;
266
229
  Data_Get_Struct(self, re2_scanner, c);
267
230
 
268
- c->input = new(nothrow) re2::StringPiece(StringValuePtr(c->text));
231
+ c->input = new(std::nothrow) re2::StringPiece(StringValuePtr(c->text));
269
232
  c->eof = false;
270
233
 
271
234
  return self;
@@ -286,44 +249,41 @@ static VALUE re2_scanner_rewind(VALUE self) {
286
249
  * s.scan #=> ["bar"]
287
250
  */
288
251
  static VALUE re2_scanner_scan(VALUE self) {
289
- int i;
290
- size_t original_input_size, new_input_size;
291
- bool input_advanced;
292
252
  re2_pattern *p;
293
253
  re2_scanner *c;
294
- VALUE result;
295
254
 
296
255
  Data_Get_Struct(self, re2_scanner, c);
297
256
  Data_Get_Struct(c->regexp, re2_pattern, p);
298
257
 
299
- vector<RE2::Arg> argv(c->number_of_capturing_groups);
300
- vector<RE2::Arg*> args(c->number_of_capturing_groups);
301
- vector<string> matches(c->number_of_capturing_groups);
258
+ std::vector<RE2::Arg> argv(c->number_of_capturing_groups);
259
+ std::vector<RE2::Arg*> args(c->number_of_capturing_groups);
260
+ std::vector<std::string> matches(c->number_of_capturing_groups);
302
261
 
303
262
  if (c->eof) {
304
263
  return Qnil;
305
264
  }
306
265
 
307
- original_input_size = c->input->size();
266
+ re2::StringPiece::size_type original_input_size = c->input->size();
308
267
 
309
- for (i = 0; i < c->number_of_capturing_groups; i++) {
268
+ for (int i = 0; i < c->number_of_capturing_groups; ++i) {
310
269
  argv[i] = &matches[i];
311
270
  args[i] = &argv[i];
312
271
  }
313
272
 
314
273
  if (RE2::FindAndConsumeN(c->input, *p->pattern, &args[0],
315
274
  c->number_of_capturing_groups)) {
316
- result = rb_ary_new2(c->number_of_capturing_groups);
317
- new_input_size = c->input->size();
318
- input_advanced = new_input_size < original_input_size;
275
+ re2::StringPiece::size_type new_input_size = c->input->size();
276
+ bool input_advanced = new_input_size < original_input_size;
277
+
278
+ VALUE result = rb_ary_new2(c->number_of_capturing_groups);
319
279
 
320
- for (i = 0; i < c->number_of_capturing_groups; i++) {
280
+ for (int i = 0; i < c->number_of_capturing_groups; ++i) {
321
281
  if (matches[i].empty()) {
322
282
  rb_ary_push(result, Qnil);
323
283
  } else {
324
- rb_ary_push(result, ENCODED_STR_NEW(matches[i].data(),
284
+ rb_ary_push(result, encoded_str_new(matches[i].data(),
325
285
  matches[i].size(),
326
- p->pattern->options().encoding() == RE2::Options::EncodingUTF8 ? "UTF-8" : "ISO-8859-1"));
286
+ p->pattern->options().encoding()));
327
287
  }
328
288
  }
329
289
 
@@ -334,47 +294,41 @@ static VALUE re2_scanner_scan(VALUE self) {
334
294
  if (!input_advanced && new_input_size > 0) {
335
295
  c->input->remove_prefix(1);
336
296
  }
297
+
298
+ return result;
337
299
  } else {
338
- result = Qnil;
300
+ return Qnil;
339
301
  }
340
-
341
- return result;
342
302
  }
343
303
 
344
304
  /*
345
305
  * Retrieve a matchdata by index or name.
346
306
  */
347
- re2::StringPiece *re2_matchdata_find_match(VALUE idx, VALUE self) {
348
- int id;
307
+ static re2::StringPiece *re2_matchdata_find_match(VALUE idx, const VALUE self) {
349
308
  re2_matchdata *m;
350
309
  re2_pattern *p;
351
- map<string, int> groups;
352
- string name;
353
- re2::StringPiece *match;
354
310
 
355
311
  Data_Get_Struct(self, re2_matchdata, m);
356
312
  Data_Get_Struct(m->regexp, re2_pattern, p);
357
313
 
314
+ int id;
315
+
358
316
  if (FIXNUM_P(idx)) {
359
317
  id = FIX2INT(idx);
360
318
  } else {
361
- if (SYMBOL_P(idx)) {
362
- name = rb_id2name(SYM2ID(idx));
363
- } else {
364
- name = StringValuePtr(idx);
365
- }
319
+ const char *name = SYMBOL_P(idx) ? rb_id2name(SYM2ID(idx)) : StringValuePtr(idx);
320
+ const std::map<std::string, int>& groups = p->pattern->NamedCapturingGroups();
321
+ std::map<std::string, int>::const_iterator search = groups.find(name);
366
322
 
367
- groups = p->pattern->NamedCapturingGroups();
368
-
369
- if (groups.count(name) == 1) {
370
- id = groups[name];
323
+ if (search != groups.end()) {
324
+ id = search->second;
371
325
  } else {
372
326
  return NULL;
373
327
  }
374
328
  }
375
329
 
376
330
  if (id >= 0 && id < m->number_of_matches) {
377
- match = &m->matches[id];
331
+ re2::StringPiece *match = &m->matches[id];
378
332
 
379
333
  if (!match->empty()) {
380
334
  return match;
@@ -393,7 +347,7 @@ re2::StringPiece *re2_matchdata_find_match(VALUE idx, VALUE self) {
393
347
  * m.size #=> 2
394
348
  * m.length #=> 2
395
349
  */
396
- static VALUE re2_matchdata_size(VALUE self) {
350
+ static VALUE re2_matchdata_size(const VALUE self) {
397
351
  re2_matchdata *m;
398
352
  Data_Get_Struct(self, re2_matchdata, m);
399
353
 
@@ -410,23 +364,18 @@ static VALUE re2_matchdata_size(VALUE self) {
410
364
  * m.begin(0) #=> 1
411
365
  * m.begin(1) #=> 4
412
366
  */
413
- static VALUE re2_matchdata_begin(VALUE self, VALUE n) {
367
+ static VALUE re2_matchdata_begin(const VALUE self, VALUE n) {
414
368
  re2_matchdata *m;
415
- re2_pattern *p;
416
- re2::StringPiece *match;
417
- long offset;
418
369
 
419
370
  Data_Get_Struct(self, re2_matchdata, m);
420
- Data_Get_Struct(m->regexp, re2_pattern, p);
421
371
 
422
- match = re2_matchdata_find_match(n, self);
372
+ re2::StringPiece *match = re2_matchdata_find_match(n, self);
423
373
  if (match == NULL) {
424
374
  return Qnil;
425
375
  } else {
426
- offset = reinterpret_cast<uintptr_t>(match->data()) - reinterpret_cast<uintptr_t>(StringValuePtr(m->text));
376
+ long offset = match->data() - StringValuePtr(m->text);
427
377
 
428
- return ENCODED_STR_SUBLEN(StringValue(m->text), offset,
429
- p->pattern->options().encoding() == RE2::Options::EncodingUTF8 ? "UTF-8" : "ISO-8859-1");
378
+ return LONG2NUM(rb_str_sublen(StringValue(m->text), offset));
430
379
  }
431
380
  }
432
381
 
@@ -440,24 +389,18 @@ static VALUE re2_matchdata_begin(VALUE self, VALUE n) {
440
389
  * m.end(0) #=> 9
441
390
  * m.end(1) #=> 7
442
391
  */
443
- static VALUE re2_matchdata_end(VALUE self, VALUE n) {
392
+ static VALUE re2_matchdata_end(const VALUE self, VALUE n) {
444
393
  re2_matchdata *m;
445
- re2_pattern *p;
446
- re2::StringPiece *match;
447
- long offset;
448
394
 
449
395
  Data_Get_Struct(self, re2_matchdata, m);
450
- Data_Get_Struct(m->regexp, re2_pattern, p);
451
-
452
- match = re2_matchdata_find_match(n, self);
453
396
 
397
+ re2::StringPiece *match = re2_matchdata_find_match(n, self);
454
398
  if (match == NULL) {
455
399
  return Qnil;
456
400
  } else {
457
- offset = reinterpret_cast<uintptr_t>(match->data()) - reinterpret_cast<uintptr_t>(StringValuePtr(m->text)) + match->size();
401
+ long offset = (match->data() - StringValuePtr(m->text)) + match->size();
458
402
 
459
- return ENCODED_STR_SUBLEN(StringValue(m->text), offset,
460
- p->pattern->options().encoding() == RE2::Options::EncodingUTF8 ? "UTF-8" : "ISO-8859-1");
403
+ return LONG2NUM(rb_str_sublen(StringValue(m->text), offset));
461
404
  }
462
405
  }
463
406
 
@@ -469,9 +412,10 @@ static VALUE re2_matchdata_end(VALUE self, VALUE n) {
469
412
  * m = RE2::Regexp.new('(\d+)').match("bob 123")
470
413
  * m.regexp #=> #<RE2::Regexp /(\d+)/>
471
414
  */
472
- static VALUE re2_matchdata_regexp(VALUE self) {
415
+ static VALUE re2_matchdata_regexp(const VALUE self) {
473
416
  re2_matchdata *m;
474
417
  Data_Get_Struct(self, re2_matchdata, m);
418
+
475
419
  return m->regexp;
476
420
  }
477
421
 
@@ -483,7 +427,7 @@ static VALUE re2_matchdata_regexp(VALUE self) {
483
427
  * c = RE2::Regexp.new('(\d+)').scan("bob 123")
484
428
  * c.regexp #=> #<RE2::Regexp /(\d+)/>
485
429
  */
486
- static VALUE re2_scanner_regexp(VALUE self) {
430
+ static VALUE re2_scanner_regexp(const VALUE self) {
487
431
  re2_scanner *c;
488
432
  Data_Get_Struct(self, re2_scanner, c);
489
433
 
@@ -492,6 +436,7 @@ static VALUE re2_scanner_regexp(VALUE self) {
492
436
 
493
437
  static VALUE re2_regexp_allocate(VALUE klass) {
494
438
  re2_pattern *p;
439
+
495
440
  return Data_Make_Struct(klass, re2_pattern, 0, re2_regexp_free, p);
496
441
  }
497
442
 
@@ -507,35 +452,31 @@ static VALUE re2_regexp_allocate(VALUE klass) {
507
452
  * m = RE2::Regexp.new('(\d+)').match("bob 123")
508
453
  * m.to_a #=> ["123", "123"]
509
454
  */
510
- static VALUE re2_matchdata_to_a(VALUE self) {
511
- int i;
455
+ static VALUE re2_matchdata_to_a(const VALUE self) {
512
456
  re2_matchdata *m;
513
457
  re2_pattern *p;
514
- re2::StringPiece *match;
515
- VALUE array;
516
458
 
517
459
  Data_Get_Struct(self, re2_matchdata, m);
518
460
  Data_Get_Struct(m->regexp, re2_pattern, p);
519
461
 
520
- array = rb_ary_new2(m->number_of_matches);
521
- for (i = 0; i < m->number_of_matches; i++) {
522
- match = &m->matches[i];
462
+ VALUE array = rb_ary_new2(m->number_of_matches);
463
+ for (int i = 0; i < m->number_of_matches; ++i) {
464
+ re2::StringPiece *match = &m->matches[i];
523
465
 
524
466
  if (match->empty()) {
525
467
  rb_ary_push(array, Qnil);
526
468
  } else {
527
- rb_ary_push(array, ENCODED_STR_NEW(match->data(), match->size(),
528
- p->pattern->options().encoding() == RE2::Options::EncodingUTF8 ? "UTF-8" : "ISO-8859-1"));
469
+ rb_ary_push(array, encoded_str_new(match->data(), match->size(),
470
+ p->pattern->options().encoding()));
529
471
  }
530
472
  }
531
473
 
532
474
  return array;
533
475
  }
534
476
 
535
- static VALUE re2_matchdata_nth_match(int nth, VALUE self) {
477
+ static VALUE re2_matchdata_nth_match(int nth, const VALUE self) {
536
478
  re2_matchdata *m;
537
479
  re2_pattern *p;
538
- re2::StringPiece *match;
539
480
 
540
481
  Data_Get_Struct(self, re2_matchdata, m);
541
482
  Data_Get_Struct(m->regexp, re2_pattern, p);
@@ -543,32 +484,29 @@ static VALUE re2_matchdata_nth_match(int nth, VALUE self) {
543
484
  if (nth < 0 || nth >= m->number_of_matches) {
544
485
  return Qnil;
545
486
  } else {
546
- match = &m->matches[nth];
487
+ re2::StringPiece *match = &m->matches[nth];
547
488
 
548
489
  if (match->empty()) {
549
490
  return Qnil;
550
491
  } else {
551
- return ENCODED_STR_NEW(match->data(), match->size(),
552
- p->pattern->options().encoding() == RE2::Options::EncodingUTF8 ? "UTF-8" : "ISO-8859-1");
492
+ return encoded_str_new(match->data(), match->size(),
493
+ p->pattern->options().encoding());
553
494
  }
554
495
  }
555
496
  }
556
497
 
557
- static VALUE re2_matchdata_named_match(const char* name, VALUE self) {
558
- int idx;
498
+ static VALUE re2_matchdata_named_match(const char* name, const VALUE self) {
559
499
  re2_matchdata *m;
560
500
  re2_pattern *p;
561
- map<string, int> groups;
562
- string name_as_string(name);
563
501
 
564
502
  Data_Get_Struct(self, re2_matchdata, m);
565
503
  Data_Get_Struct(m->regexp, re2_pattern, p);
566
504
 
567
- groups = p->pattern->NamedCapturingGroups();
505
+ const std::map<std::string, int>& groups = p->pattern->NamedCapturingGroups();
506
+ std::map<std::string, int>::const_iterator search = groups.find(name);
568
507
 
569
- if (groups.count(name_as_string) == 1) {
570
- idx = groups[name_as_string];
571
- return re2_matchdata_nth_match(idx, self);
508
+ if (search != groups.end()) {
509
+ return re2_matchdata_nth_match(search->second, self);
572
510
  } else {
573
511
  return Qnil;
574
512
  }
@@ -621,7 +559,7 @@ static VALUE re2_matchdata_named_match(const char* name, VALUE self) {
621
559
  * m["number"] #=> "123"
622
560
  * m[:number] #=> "123"
623
561
  */
624
- static VALUE re2_matchdata_aref(int argc, VALUE *argv, VALUE self) {
562
+ static VALUE re2_matchdata_aref(int argc, VALUE *argv, const VALUE self) {
625
563
  VALUE idx, rest;
626
564
  rb_scan_args(argc, argv, "11", &idx, &rest);
627
565
 
@@ -641,7 +579,7 @@ static VALUE re2_matchdata_aref(int argc, VALUE *argv, VALUE self) {
641
579
  *
642
580
  * @return [String] the entire matched string
643
581
  */
644
- static VALUE re2_matchdata_to_s(VALUE self) {
582
+ static VALUE re2_matchdata_to_s(const VALUE self) {
645
583
  return re2_matchdata_nth_match(0, self);
646
584
  }
647
585
 
@@ -657,26 +595,24 @@ static VALUE re2_matchdata_to_s(VALUE self) {
657
595
  * m = RE2::Regexp.new('(\d+)').match("bob 123")
658
596
  * m.inspect #=> "#<RE2::MatchData \"123\" 1:\"123\">"
659
597
  */
660
- static VALUE re2_matchdata_inspect(VALUE self) {
661
- int i;
598
+ static VALUE re2_matchdata_inspect(const VALUE self) {
662
599
  re2_matchdata *m;
663
600
  re2_pattern *p;
664
- VALUE match, result;
665
- ostringstream output;
666
601
 
667
602
  Data_Get_Struct(self, re2_matchdata, m);
668
603
  Data_Get_Struct(m->regexp, re2_pattern, p);
669
604
 
605
+ std::ostringstream output;
670
606
  output << "#<RE2::MatchData";
671
607
 
672
- for (i = 0; i < m->number_of_matches; i++) {
608
+ for (int i = 0; i < m->number_of_matches; ++i) {
673
609
  output << " ";
674
610
 
675
611
  if (i > 0) {
676
612
  output << i << ":";
677
613
  }
678
614
 
679
- match = re2_matchdata_nth_match(i, self);
615
+ VALUE match = re2_matchdata_nth_match(i, self);
680
616
 
681
617
  if (match == Qnil) {
682
618
  output << "nil";
@@ -687,10 +623,8 @@ static VALUE re2_matchdata_inspect(VALUE self) {
687
623
 
688
624
  output << ">";
689
625
 
690
- result = ENCODED_STR_NEW(output.str().data(), output.str().length(),
691
- p->pattern->options().encoding() == RE2::Options::EncodingUTF8 ? "UTF-8" : "ISO-8859-1");
692
-
693
- return result;
626
+ return encoded_str_new(output.str().data(), output.str().length(),
627
+ p->pattern->options().encoding());
694
628
  }
695
629
 
696
630
  /*
@@ -713,25 +647,22 @@ static VALUE re2_matchdata_inspect(VALUE self) {
713
647
  * puts "Unrecognised match"
714
648
  * end
715
649
  */
716
- static VALUE re2_matchdata_deconstruct(VALUE self) {
717
- int i;
650
+ static VALUE re2_matchdata_deconstruct(const VALUE self) {
718
651
  re2_matchdata *m;
719
652
  re2_pattern *p;
720
- re2::StringPiece *match;
721
- VALUE array;
722
653
 
723
654
  Data_Get_Struct(self, re2_matchdata, m);
724
655
  Data_Get_Struct(m->regexp, re2_pattern, p);
725
656
 
726
- array = rb_ary_new2(m->number_of_matches - 1);
727
- for (i = 1; i < m->number_of_matches; i++) {
728
- match = &m->matches[i];
657
+ VALUE array = rb_ary_new2(m->number_of_matches - 1);
658
+ for (int i = 1; i < m->number_of_matches; ++i) {
659
+ re2::StringPiece *match = &m->matches[i];
729
660
 
730
661
  if (match->empty()) {
731
662
  rb_ary_push(array, Qnil);
732
663
  } else {
733
- rb_ary_push(array, ENCODED_STR_NEW(match->data(), match->size(),
734
- p->pattern->options().encoding() == RE2::Options::EncodingUTF8 ? "UTF-8" : "ISO-8859-1"));
664
+ rb_ary_push(array, encoded_str_new(match->data(), match->size(),
665
+ p->pattern->options().encoding()));
735
666
  }
736
667
  }
737
668
 
@@ -766,40 +697,37 @@ static VALUE re2_matchdata_deconstruct(VALUE self) {
766
697
  * puts "Unrecognised match"
767
698
  * end
768
699
  */
769
- static VALUE re2_matchdata_deconstruct_keys(VALUE self, VALUE keys) {
770
- int i;
771
- VALUE capturing_groups, key;
700
+ static VALUE re2_matchdata_deconstruct_keys(const VALUE self, const VALUE keys) {
772
701
  re2_matchdata *m;
773
702
  re2_pattern *p;
774
- map<string, int> groups;
775
- map<string, int>::iterator iterator;
776
703
 
777
704
  Data_Get_Struct(self, re2_matchdata, m);
778
705
  Data_Get_Struct(m->regexp, re2_pattern, p);
779
706
 
780
- groups = p->pattern->NamedCapturingGroups();
781
- capturing_groups = rb_hash_new();
707
+ const std::map<std::string, int>& groups = p->pattern->NamedCapturingGroups();
708
+ VALUE capturing_groups = rb_hash_new();
782
709
 
783
710
  if (NIL_P(keys)) {
784
- for (iterator = groups.begin(); iterator != groups.end(); iterator++) {
711
+ for (std::map<std::string, int>::const_iterator it = groups.begin(); it != groups.end(); ++it) {
785
712
  rb_hash_aset(capturing_groups,
786
- ID2SYM(rb_intern(iterator->first.data())),
787
- re2_matchdata_nth_match(iterator->second, self));
713
+ ID2SYM(rb_intern(it->first.data())),
714
+ re2_matchdata_nth_match(it->second, self));
788
715
  }
789
716
  } else {
790
717
  Check_Type(keys, T_ARRAY);
791
718
 
792
719
  if (p->pattern->NumberOfCapturingGroups() >= RARRAY_LEN(keys)) {
793
- for (i = 0; i < RARRAY_LEN(keys); i++) {
794
- key = rb_ary_entry(keys, i);
720
+ for (int i = 0; i < RARRAY_LEN(keys); ++i) {
721
+ VALUE key = rb_ary_entry(keys, i);
795
722
  Check_Type(key, T_SYMBOL);
796
- string name(rb_id2name(SYM2ID(key)));
723
+ const char *name = rb_id2name(SYM2ID(key));
724
+ std::map<std::string, int>::const_iterator search = groups.find(name);
797
725
 
798
- if (groups.count(name) == 0) {
726
+ if (search != groups.end()) {
727
+ rb_hash_aset(capturing_groups, key, re2_matchdata_nth_match(search->second, self));
728
+ } else {
799
729
  break;
800
730
  }
801
-
802
- rb_hash_aset(capturing_groups, key, re2_matchdata_nth_match(groups[name], self));
803
731
  }
804
732
  }
805
733
  }
@@ -816,6 +744,7 @@ static VALUE re2_matchdata_deconstruct_keys(VALUE self, VALUE keys) {
816
744
  */
817
745
  static VALUE re2_re2(int argc, VALUE *argv, VALUE self) {
818
746
  UNUSED(self);
747
+
819
748
  return rb_class_new_instance(argc, argv, re2_cRegexp);
820
749
  }
821
750
 
@@ -863,11 +792,11 @@ static VALUE re2_regexp_initialize(int argc, VALUE *argv, VALUE self) {
863
792
 
864
793
  if (RTEST(options)) {
865
794
  RE2::Options re2_options;
866
- parse_re2_options(re2_options, options);
795
+ parse_re2_options(&re2_options, options);
867
796
 
868
- p->pattern = new(nothrow) RE2(StringValuePtr(pattern), re2_options);
797
+ p->pattern = new(std::nothrow) RE2(StringValuePtr(pattern), re2_options);
869
798
  } else {
870
- p->pattern = new(nothrow) RE2(StringValuePtr(pattern));
799
+ p->pattern = new(std::nothrow) RE2(StringValuePtr(pattern));
871
800
  }
872
801
 
873
802
  if (p->pattern == 0) {
@@ -889,19 +818,17 @@ static VALUE re2_regexp_initialize(int argc, VALUE *argv, VALUE self) {
889
818
  * re2 = RE2::Regexp.new("woo?")
890
819
  * re2.inspect #=> "#<RE2::Regexp /woo?/>"
891
820
  */
892
- static VALUE re2_regexp_inspect(VALUE self) {
821
+ static VALUE re2_regexp_inspect(const VALUE self) {
893
822
  re2_pattern *p;
894
- VALUE result;
895
- ostringstream output;
896
823
 
897
824
  Data_Get_Struct(self, re2_pattern, p);
898
825
 
899
- output << "#<RE2::Regexp /" << p->pattern->pattern() << "/>";
826
+ std::ostringstream output;
900
827
 
901
- result = ENCODED_STR_NEW(output.str().data(), output.str().length(),
902
- p->pattern->options().encoding() == RE2::Options::EncodingUTF8 ? "UTF-8" : "ISO-8859-1");
828
+ output << "#<RE2::Regexp /" << p->pattern->pattern() << "/>";
903
829
 
904
- return result;
830
+ return encoded_str_new(output.str().data(), output.str().length(),
831
+ p->pattern->options().encoding());
905
832
  }
906
833
 
907
834
  /*
@@ -916,12 +843,13 @@ static VALUE re2_regexp_inspect(VALUE self) {
916
843
  * re2 = RE2::Regexp.new("woo?")
917
844
  * re2.to_s #=> "woo?"
918
845
  */
919
- static VALUE re2_regexp_to_s(VALUE self) {
846
+ static VALUE re2_regexp_to_s(const VALUE self) {
920
847
  re2_pattern *p;
921
848
  Data_Get_Struct(self, re2_pattern, p);
922
- return ENCODED_STR_NEW(p->pattern->pattern().data(),
849
+
850
+ return encoded_str_new(p->pattern->pattern().data(),
923
851
  p->pattern->pattern().size(),
924
- p->pattern->options().encoding() == RE2::Options::EncodingUTF8 ? "UTF-8" : "ISO-8859-1");
852
+ p->pattern->options().encoding());
925
853
  }
926
854
 
927
855
  /*
@@ -933,9 +861,10 @@ static VALUE re2_regexp_to_s(VALUE self) {
933
861
  * re2 = RE2::Regexp.new("woo?")
934
862
  * re2.ok? #=> true
935
863
  */
936
- static VALUE re2_regexp_ok(VALUE self) {
864
+ static VALUE re2_regexp_ok(const VALUE self) {
937
865
  re2_pattern *p;
938
866
  Data_Get_Struct(self, re2_pattern, p);
867
+
939
868
  return BOOL2RUBY(p->pattern->ok());
940
869
  }
941
870
 
@@ -948,9 +877,10 @@ static VALUE re2_regexp_ok(VALUE self) {
948
877
  * re2 = RE2::Regexp.new("woo?", :utf8 => true)
949
878
  * re2.utf8? #=> true
950
879
  */
951
- static VALUE re2_regexp_utf8(VALUE self) {
880
+ static VALUE re2_regexp_utf8(const VALUE self) {
952
881
  re2_pattern *p;
953
882
  Data_Get_Struct(self, re2_pattern, p);
883
+
954
884
  return BOOL2RUBY(p->pattern->options().encoding() == RE2::Options::EncodingUTF8);
955
885
  }
956
886
 
@@ -963,9 +893,10 @@ static VALUE re2_regexp_utf8(VALUE self) {
963
893
  * re2 = RE2::Regexp.new("woo?", :posix_syntax => true)
964
894
  * re2.posix_syntax? #=> true
965
895
  */
966
- static VALUE re2_regexp_posix_syntax(VALUE self) {
896
+ static VALUE re2_regexp_posix_syntax(const VALUE self) {
967
897
  re2_pattern *p;
968
898
  Data_Get_Struct(self, re2_pattern, p);
899
+
969
900
  return BOOL2RUBY(p->pattern->options().posix_syntax());
970
901
  }
971
902
 
@@ -978,9 +909,10 @@ static VALUE re2_regexp_posix_syntax(VALUE self) {
978
909
  * re2 = RE2::Regexp.new("woo?", :longest_match => true)
979
910
  * re2.longest_match? #=> true
980
911
  */
981
- static VALUE re2_regexp_longest_match(VALUE self) {
912
+ static VALUE re2_regexp_longest_match(const VALUE self) {
982
913
  re2_pattern *p;
983
914
  Data_Get_Struct(self, re2_pattern, p);
915
+
984
916
  return BOOL2RUBY(p->pattern->options().longest_match());
985
917
  }
986
918
 
@@ -993,9 +925,10 @@ static VALUE re2_regexp_longest_match(VALUE self) {
993
925
  * re2 = RE2::Regexp.new("woo?", :log_errors => true)
994
926
  * re2.log_errors? #=> true
995
927
  */
996
- static VALUE re2_regexp_log_errors(VALUE self) {
928
+ static VALUE re2_regexp_log_errors(const VALUE self) {
997
929
  re2_pattern *p;
998
930
  Data_Get_Struct(self, re2_pattern, p);
931
+
999
932
  return BOOL2RUBY(p->pattern->options().log_errors());
1000
933
  }
1001
934
 
@@ -1008,9 +941,10 @@ static VALUE re2_regexp_log_errors(VALUE self) {
1008
941
  * re2 = RE2::Regexp.new("woo?", :max_mem => 1024)
1009
942
  * re2.max_mem #=> 1024
1010
943
  */
1011
- static VALUE re2_regexp_max_mem(VALUE self) {
944
+ static VALUE re2_regexp_max_mem(const VALUE self) {
1012
945
  re2_pattern *p;
1013
946
  Data_Get_Struct(self, re2_pattern, p);
947
+
1014
948
  return INT2FIX(p->pattern->options().max_mem());
1015
949
  }
1016
950
 
@@ -1023,9 +957,10 @@ static VALUE re2_regexp_max_mem(VALUE self) {
1023
957
  * re2 = RE2::Regexp.new("woo?", :literal => true)
1024
958
  * re2.literal? #=> true
1025
959
  */
1026
- static VALUE re2_regexp_literal(VALUE self) {
960
+ static VALUE re2_regexp_literal(const VALUE self) {
1027
961
  re2_pattern *p;
1028
962
  Data_Get_Struct(self, re2_pattern, p);
963
+
1029
964
  return BOOL2RUBY(p->pattern->options().literal());
1030
965
  }
1031
966
 
@@ -1038,9 +973,10 @@ static VALUE re2_regexp_literal(VALUE self) {
1038
973
  * re2 = RE2::Regexp.new("woo?", :never_nl => true)
1039
974
  * re2.never_nl? #=> true
1040
975
  */
1041
- static VALUE re2_regexp_never_nl(VALUE self) {
976
+ static VALUE re2_regexp_never_nl(const VALUE self) {
1042
977
  re2_pattern *p;
1043
978
  Data_Get_Struct(self, re2_pattern, p);
979
+
1044
980
  return BOOL2RUBY(p->pattern->options().never_nl());
1045
981
  }
1046
982
 
@@ -1053,9 +989,10 @@ static VALUE re2_regexp_never_nl(VALUE self) {
1053
989
  * re2 = RE2::Regexp.new("woo?", :case_sensitive => true)
1054
990
  * re2.case_sensitive? #=> true
1055
991
  */
1056
- static VALUE re2_regexp_case_sensitive(VALUE self) {
992
+ static VALUE re2_regexp_case_sensitive(const VALUE self) {
1057
993
  re2_pattern *p;
1058
994
  Data_Get_Struct(self, re2_pattern, p);
995
+
1059
996
  return BOOL2RUBY(p->pattern->options().case_sensitive());
1060
997
  }
1061
998
 
@@ -1069,7 +1006,7 @@ static VALUE re2_regexp_case_sensitive(VALUE self) {
1069
1006
  * re2.case_insensitive? #=> false
1070
1007
  * re2.casefold? #=> false
1071
1008
  */
1072
- static VALUE re2_regexp_case_insensitive(VALUE self) {
1009
+ static VALUE re2_regexp_case_insensitive(const VALUE self) {
1073
1010
  return BOOL2RUBY(re2_regexp_case_sensitive(self) != Qtrue);
1074
1011
  }
1075
1012
 
@@ -1082,9 +1019,10 @@ static VALUE re2_regexp_case_insensitive(VALUE self) {
1082
1019
  * re2 = RE2::Regexp.new("woo?", :perl_classes => true)
1083
1020
  * re2.perl_classes? #=> true
1084
1021
  */
1085
- static VALUE re2_regexp_perl_classes(VALUE self) {
1022
+ static VALUE re2_regexp_perl_classes(const VALUE self) {
1086
1023
  re2_pattern *p;
1087
1024
  Data_Get_Struct(self, re2_pattern, p);
1025
+
1088
1026
  return BOOL2RUBY(p->pattern->options().perl_classes());
1089
1027
  }
1090
1028
 
@@ -1097,9 +1035,10 @@ static VALUE re2_regexp_perl_classes(VALUE self) {
1097
1035
  * re2 = RE2::Regexp.new("woo?", :word_boundary => true)
1098
1036
  * re2.word_boundary? #=> true
1099
1037
  */
1100
- static VALUE re2_regexp_word_boundary(VALUE self) {
1038
+ static VALUE re2_regexp_word_boundary(const VALUE self) {
1101
1039
  re2_pattern *p;
1102
1040
  Data_Get_Struct(self, re2_pattern, p);
1041
+
1103
1042
  return BOOL2RUBY(p->pattern->options().word_boundary());
1104
1043
  }
1105
1044
 
@@ -1112,9 +1051,10 @@ static VALUE re2_regexp_word_boundary(VALUE self) {
1112
1051
  * re2 = RE2::Regexp.new("woo?", :one_line => true)
1113
1052
  * re2.one_line? #=> true
1114
1053
  */
1115
- static VALUE re2_regexp_one_line(VALUE self) {
1054
+ static VALUE re2_regexp_one_line(const VALUE self) {
1116
1055
  re2_pattern *p;
1117
1056
  Data_Get_Struct(self, re2_pattern, p);
1057
+
1118
1058
  return BOOL2RUBY(p->pattern->options().one_line());
1119
1059
  }
1120
1060
 
@@ -1124,9 +1064,10 @@ static VALUE re2_regexp_one_line(VALUE self) {
1124
1064
  *
1125
1065
  * @return [String, nil] the error string or nil
1126
1066
  */
1127
- static VALUE re2_regexp_error(VALUE self) {
1067
+ static VALUE re2_regexp_error(const VALUE self) {
1128
1068
  re2_pattern *p;
1129
1069
  Data_Get_Struct(self, re2_pattern, p);
1070
+
1130
1071
  if (p->pattern->ok()) {
1131
1072
  return Qnil;
1132
1073
  } else {
@@ -1144,15 +1085,16 @@ static VALUE re2_regexp_error(VALUE self) {
1144
1085
  *
1145
1086
  * @return [String, nil] the offending portion of the regexp or nil
1146
1087
  */
1147
- static VALUE re2_regexp_error_arg(VALUE self) {
1088
+ static VALUE re2_regexp_error_arg(const VALUE self) {
1148
1089
  re2_pattern *p;
1149
1090
  Data_Get_Struct(self, re2_pattern, p);
1091
+
1150
1092
  if (p->pattern->ok()) {
1151
1093
  return Qnil;
1152
1094
  } else {
1153
- return ENCODED_STR_NEW(p->pattern->error_arg().data(),
1095
+ return encoded_str_new(p->pattern->error_arg().data(),
1154
1096
  p->pattern->error_arg().size(),
1155
- p->pattern->options().encoding() == RE2::Options::EncodingUTF8 ? "UTF-8" : "ISO-8859-1");
1097
+ p->pattern->options().encoding());
1156
1098
  }
1157
1099
  }
1158
1100
 
@@ -1163,9 +1105,10 @@ static VALUE re2_regexp_error_arg(VALUE self) {
1163
1105
  *
1164
1106
  * @return [Integer] the regexp "cost"
1165
1107
  */
1166
- static VALUE re2_regexp_program_size(VALUE self) {
1108
+ static VALUE re2_regexp_program_size(const VALUE self) {
1167
1109
  re2_pattern *p;
1168
1110
  Data_Get_Struct(self, re2_pattern, p);
1111
+
1169
1112
  return INT2FIX(p->pattern->ProgramSize());
1170
1113
  }
1171
1114
 
@@ -1175,12 +1118,11 @@ static VALUE re2_regexp_program_size(VALUE self) {
1175
1118
  *
1176
1119
  * @return [Hash] the options
1177
1120
  */
1178
- static VALUE re2_regexp_options(VALUE self) {
1179
- VALUE options;
1121
+ static VALUE re2_regexp_options(const VALUE self) {
1180
1122
  re2_pattern *p;
1181
1123
 
1182
1124
  Data_Get_Struct(self, re2_pattern, p);
1183
- options = rb_hash_new();
1125
+ VALUE options = rb_hash_new();
1184
1126
 
1185
1127
  rb_hash_aset(options, ID2SYM(id_utf8),
1186
1128
  BOOL2RUBY(p->pattern->options().encoding() == RE2::Options::EncodingUTF8));
@@ -1228,10 +1170,10 @@ static VALUE re2_regexp_options(VALUE self) {
1228
1170
  *
1229
1171
  * @return [Integer] the number of capturing subpatterns
1230
1172
  */
1231
- static VALUE re2_regexp_number_of_capturing_groups(VALUE self) {
1173
+ static VALUE re2_regexp_number_of_capturing_groups(const VALUE self) {
1232
1174
  re2_pattern *p;
1233
-
1234
1175
  Data_Get_Struct(self, re2_pattern, p);
1176
+
1235
1177
  return INT2FIX(p->pattern->NumberOfCapturingGroups());
1236
1178
  }
1237
1179
 
@@ -1244,21 +1186,18 @@ static VALUE re2_regexp_number_of_capturing_groups(VALUE self) {
1244
1186
  *
1245
1187
  * @return [Hash] a hash of names to capturing indices
1246
1188
  */
1247
- static VALUE re2_regexp_named_capturing_groups(VALUE self) {
1248
- VALUE capturing_groups;
1189
+ static VALUE re2_regexp_named_capturing_groups(const VALUE self) {
1249
1190
  re2_pattern *p;
1250
- map<string, int> groups;
1251
- map<string, int>::iterator iterator;
1252
1191
 
1253
1192
  Data_Get_Struct(self, re2_pattern, p);
1254
- groups = p->pattern->NamedCapturingGroups();
1255
- capturing_groups = rb_hash_new();
1193
+ const std::map<std::string, int>& groups = p->pattern->NamedCapturingGroups();
1194
+ VALUE capturing_groups = rb_hash_new();
1256
1195
 
1257
- for (iterator = groups.begin(); iterator != groups.end(); iterator++) {
1196
+ for (std::map<std::string, int>::const_iterator it = groups.begin(); it != groups.end(); ++it) {
1258
1197
  rb_hash_aset(capturing_groups,
1259
- ENCODED_STR_NEW(iterator->first.data(), iterator->first.size(),
1260
- p->pattern->options().encoding() == RE2::Options::EncodingUTF8 ? "UTF-8" : "ISO-8859-1"),
1261
- INT2FIX(iterator->second));
1198
+ encoded_str_new(it->first.data(), it->first.size(),
1199
+ p->pattern->options().encoding()),
1200
+ INT2FIX(it->second));
1262
1201
  }
1263
1202
 
1264
1203
  return capturing_groups;
@@ -1316,20 +1255,20 @@ static VALUE re2_regexp_named_capturing_groups(VALUE self) {
1316
1255
  * r.match('woo', 1) #=> #<RE2::MatchData "woo" 1:"o">
1317
1256
  * r.match('woo', 3) #=> #<RE2::MatchData "woo" 1:"o" 2:"o" 3:nil>
1318
1257
  */
1319
- static VALUE re2_regexp_match(int argc, VALUE *argv, VALUE self) {
1320
- int n;
1321
- bool matched;
1258
+ static VALUE re2_regexp_match(int argc, VALUE *argv, const VALUE self) {
1322
1259
  re2_pattern *p;
1323
1260
  re2_matchdata *m;
1324
- VALUE text, number_of_matches, matchdata;
1261
+ VALUE text, number_of_matches;
1325
1262
 
1326
1263
  rb_scan_args(argc, argv, "11", &text, &number_of_matches);
1327
1264
 
1328
1265
  /* Ensure text is a string. */
1329
- text = StringValue(text);
1266
+ StringValue(text);
1330
1267
 
1331
1268
  Data_Get_Struct(self, re2_pattern, p);
1332
1269
 
1270
+ int n;
1271
+
1333
1272
  if (RTEST(number_of_matches)) {
1334
1273
  n = NUM2INT(number_of_matches);
1335
1274
 
@@ -1345,17 +1284,21 @@ static VALUE re2_regexp_match(int argc, VALUE *argv, VALUE self) {
1345
1284
  }
1346
1285
 
1347
1286
  if (n == 0) {
1348
- matched = match(p->pattern, StringValuePtr(text), 0,
1349
- static_cast<int>(RSTRING_LEN(text)), RE2::UNANCHORED, 0, 0);
1287
+ #ifdef HAVE_ENDPOS_ARGUMENT
1288
+ bool matched = p->pattern->Match(StringValuePtr(text), 0,
1289
+ RSTRING_LEN(text), RE2::UNANCHORED, 0, 0);
1290
+ #else
1291
+ bool matched = p->pattern->Match(StringValuePtr(text), 0, RE2::UNANCHORED,
1292
+ 0, 0);
1293
+ #endif
1350
1294
  return BOOL2RUBY(matched);
1351
1295
  } else {
1352
-
1353
1296
  /* Because match returns the whole match as well. */
1354
1297
  n += 1;
1355
1298
 
1356
- matchdata = rb_class_new_instance(0, 0, re2_cMatchData);
1299
+ VALUE matchdata = rb_class_new_instance(0, 0, re2_cMatchData);
1357
1300
  Data_Get_Struct(matchdata, re2_matchdata, m);
1358
- m->matches = new(nothrow) re2::StringPiece[n];
1301
+ m->matches = new(std::nothrow) re2::StringPiece[n];
1359
1302
  m->regexp = self;
1360
1303
  m->text = rb_str_dup(text);
1361
1304
  rb_str_freeze(m->text);
@@ -1367,10 +1310,13 @@ static VALUE re2_regexp_match(int argc, VALUE *argv, VALUE self) {
1367
1310
 
1368
1311
  m->number_of_matches = n;
1369
1312
 
1370
- matched = match(p->pattern, StringValuePtr(m->text), 0,
1371
- static_cast<int>(RSTRING_LEN(m->text)),
1372
- RE2::UNANCHORED, m->matches, n);
1373
-
1313
+ #ifdef HAVE_ENDPOS_ARGUMENT
1314
+ bool matched = p->pattern->Match(StringValuePtr(m->text), 0,
1315
+ RSTRING_LEN(m->text), RE2::UNANCHORED, m->matches, n);
1316
+ #else
1317
+ bool matched = p->pattern->Match(StringValuePtr(m->text), 0,
1318
+ RE2::UNANCHORED, m->matches, n);
1319
+ #endif
1374
1320
  if (matched) {
1375
1321
  return matchdata;
1376
1322
  } else {
@@ -1385,10 +1331,8 @@ static VALUE re2_regexp_match(int argc, VALUE *argv, VALUE self) {
1385
1331
  *
1386
1332
  * @return [Boolean] whether the match was successful
1387
1333
  */
1388
- static VALUE re2_regexp_match_p(VALUE self, VALUE text) {
1389
- VALUE argv[2];
1390
- argv[0] = text;
1391
- argv[1] = INT2FIX(0);
1334
+ static VALUE re2_regexp_match_p(const VALUE self, VALUE text) {
1335
+ VALUE argv[2] = { text, INT2FIX(0) };
1392
1336
 
1393
1337
  return re2_regexp_match(2, argv, self);
1394
1338
  }
@@ -1399,16 +1343,15 @@ static VALUE re2_regexp_match_p(VALUE self, VALUE text) {
1399
1343
  * @example
1400
1344
  * c = RE2::Regexp.new('(\w+)').scan("Foo bar baz")
1401
1345
  */
1402
- static VALUE re2_regexp_scan(VALUE self, VALUE text) {
1346
+ static VALUE re2_regexp_scan(const VALUE self, VALUE text) {
1403
1347
  re2_pattern *p;
1404
1348
  re2_scanner *c;
1405
- VALUE scanner;
1406
1349
 
1407
1350
  Data_Get_Struct(self, re2_pattern, p);
1408
- scanner = rb_class_new_instance(0, 0, re2_cScanner);
1351
+ VALUE scanner = rb_class_new_instance(0, 0, re2_cScanner);
1409
1352
  Data_Get_Struct(scanner, re2_scanner, c);
1410
1353
 
1411
- c->input = new(nothrow) re2::StringPiece(StringValuePtr(text));
1354
+ c->input = new(std::nothrow) re2::StringPiece(StringValuePtr(text));
1412
1355
  c->regexp = self;
1413
1356
  c->text = text;
1414
1357
 
@@ -1448,23 +1391,21 @@ static VALUE re2_Replace(VALUE self, VALUE str, VALUE pattern,
1448
1391
  /* Take a copy of str so it can be modified in-place by
1449
1392
  * RE2::Replace.
1450
1393
  */
1451
- string str_as_string(StringValuePtr(str));
1394
+ std::string str_as_string(StringValuePtr(str));
1452
1395
 
1453
1396
  /* Do the replacement. */
1454
1397
  if (rb_obj_is_kind_of(pattern, re2_cRegexp)) {
1455
1398
  Data_Get_Struct(pattern, re2_pattern, p);
1456
1399
  RE2::Replace(&str_as_string, *p->pattern, StringValuePtr(rewrite));
1457
1400
 
1458
- return ENCODED_STR_NEW(str_as_string.data(), str_as_string.size(),
1459
- p->pattern->options().encoding() == RE2::Options::EncodingUTF8 ? "UTF-8" : "ISO-8859-1");
1401
+ return encoded_str_new(str_as_string.data(), str_as_string.size(),
1402
+ p->pattern->options().encoding());
1460
1403
  } else {
1461
1404
  RE2::Replace(&str_as_string, StringValuePtr(pattern),
1462
1405
  StringValuePtr(rewrite));
1463
1406
 
1464
- return ENCODED_STR_NEW(str_as_string.data(), str_as_string.size(),
1465
- "UTF-8");
1407
+ return encoded_str_new(str_as_string.data(), str_as_string.size(), RE2::Options::EncodingUTF8);
1466
1408
  }
1467
-
1468
1409
  }
1469
1410
 
1470
1411
  /*
@@ -1491,21 +1432,20 @@ static VALUE re2_GlobalReplace(VALUE self, VALUE str, VALUE pattern,
1491
1432
  * RE2::GlobalReplace.
1492
1433
  */
1493
1434
  re2_pattern *p;
1494
- string str_as_string(StringValuePtr(str));
1435
+ std::string str_as_string(StringValuePtr(str));
1495
1436
 
1496
1437
  /* Do the replacement. */
1497
1438
  if (rb_obj_is_kind_of(pattern, re2_cRegexp)) {
1498
1439
  Data_Get_Struct(pattern, re2_pattern, p);
1499
1440
  RE2::GlobalReplace(&str_as_string, *p->pattern, StringValuePtr(rewrite));
1500
1441
 
1501
- return ENCODED_STR_NEW(str_as_string.data(), str_as_string.size(),
1502
- p->pattern->options().encoding() == RE2::Options::EncodingUTF8 ? "UTF-8" : "ISO-8859-1");
1442
+ return encoded_str_new(str_as_string.data(), str_as_string.size(),
1443
+ p->pattern->options().encoding());
1503
1444
  } else {
1504
1445
  RE2::GlobalReplace(&str_as_string, StringValuePtr(pattern),
1505
1446
  StringValuePtr(rewrite));
1506
1447
 
1507
- return ENCODED_STR_NEW(str_as_string.data(), str_as_string.size(),
1508
- "UTF-8");
1448
+ return encoded_str_new(str_as_string.data(), str_as_string.size(), RE2::Options::EncodingUTF8);
1509
1449
  }
1510
1450
  }
1511
1451
 
@@ -1521,11 +1461,12 @@ static VALUE re2_GlobalReplace(VALUE self, VALUE str, VALUE pattern,
1521
1461
  */
1522
1462
  static VALUE re2_QuoteMeta(VALUE self, VALUE unquoted) {
1523
1463
  UNUSED(self);
1524
- string quoted_string = RE2::QuoteMeta(StringValuePtr(unquoted));
1464
+ std::string quoted_string = RE2::QuoteMeta(StringValuePtr(unquoted));
1465
+
1525
1466
  return rb_str_new(quoted_string.data(), quoted_string.size());
1526
1467
  }
1527
1468
 
1528
- void re2_set_free(re2_set *self) {
1469
+ static void re2_set_free(re2_set *self) {
1529
1470
  if (self->set) {
1530
1471
  delete self->set;
1531
1472
  }
@@ -1535,6 +1476,7 @@ void re2_set_free(re2_set *self) {
1535
1476
  static VALUE re2_set_allocate(VALUE klass) {
1536
1477
  re2_set *s;
1537
1478
  VALUE result = Data_Make_Struct(klass, re2_set, 0, re2_set_free, s);
1479
+
1538
1480
  return result;
1539
1481
  }
1540
1482
 
@@ -1582,18 +1524,16 @@ static VALUE re2_set_allocate(VALUE klass) {
1582
1524
  static VALUE re2_set_initialize(int argc, VALUE *argv, VALUE self) {
1583
1525
  VALUE anchor, options;
1584
1526
  re2_set *s;
1585
- RE2::Anchor re2_anchor;
1527
+ RE2::Anchor re2_anchor = RE2::UNANCHORED;
1586
1528
  RE2::Options re2_options;
1587
1529
 
1588
1530
  rb_scan_args(argc, argv, "02", &anchor, &options);
1589
1531
  Data_Get_Struct(self, re2_set, s);
1590
1532
 
1591
1533
  if (RTEST(options)) {
1592
- parse_re2_options(re2_options, options);
1534
+ parse_re2_options(&re2_options, options);
1593
1535
  }
1594
- if (NIL_P(anchor)) {
1595
- re2_anchor = RE2::UNANCHORED;
1596
- } else {
1536
+ if (!NIL_P(anchor)) {
1597
1537
  Check_Type(anchor, T_SYMBOL);
1598
1538
  ID id_anchor = SYM2ID(anchor);
1599
1539
  if (id_anchor == id_unanchored) {
@@ -1607,7 +1547,7 @@ static VALUE re2_set_initialize(int argc, VALUE *argv, VALUE self) {
1607
1547
  }
1608
1548
  }
1609
1549
 
1610
- s->set = new(nothrow) RE2::Set(re2_options, re2_anchor);
1550
+ s->set = new(std::nothrow) RE2::Set(re2_options, re2_anchor);
1611
1551
  if (s->set == 0) {
1612
1552
  rb_raise(rb_eNoMemError, "not enough memory to allocate RE2::Set object");
1613
1553
  }
@@ -1715,26 +1655,27 @@ static VALUE re2_set_match_raises_errors_p(VALUE self) {
1715
1655
  * set.compile
1716
1656
  * set.match("abcdef", :exception => true) # => [0, 1]
1717
1657
  */
1718
- static VALUE re2_set_match(int argc, VALUE *argv, VALUE self) {
1719
- VALUE str, options, exception_option;
1658
+ static VALUE re2_set_match(int argc, VALUE *argv, const VALUE self) {
1659
+ VALUE str, options;
1720
1660
  bool raise_exception = true;
1721
1661
  rb_scan_args(argc, argv, "11", &str, &options);
1722
1662
 
1723
1663
  StringValue(str);
1724
1664
  re2::StringPiece data(RSTRING_PTR(str), RSTRING_LEN(str));
1725
- std::vector<int> v;
1726
1665
  re2_set *s;
1727
1666
  Data_Get_Struct(self, re2_set, s);
1728
1667
 
1729
1668
  if (RTEST(options)) {
1730
1669
  Check_Type(options, T_HASH);
1731
1670
 
1732
- exception_option = rb_hash_aref(options, ID2SYM(id_exception));
1671
+ VALUE exception_option = rb_hash_aref(options, ID2SYM(id_exception));
1733
1672
  if (!NIL_P(exception_option)) {
1734
1673
  raise_exception = RTEST(exception_option);
1735
1674
  }
1736
1675
  }
1737
1676
 
1677
+ std::vector<int> v;
1678
+
1738
1679
  if (raise_exception) {
1739
1680
  #ifdef HAVE_ERROR_INFO_ARGUMENT
1740
1681
  RE2::Set::ErrorInfo e;
@@ -1755,7 +1696,7 @@ static VALUE re2_set_match(int argc, VALUE *argv, VALUE self) {
1755
1696
  rb_raise(re2_eSetMatchError, "Unknown RE2::Set::ErrorKind: %d", e.kind);
1756
1697
  }
1757
1698
  } else {
1758
- for (size_t i = 0; i < v.size(); i++) {
1699
+ for (std::vector<int>::size_type i = 0; i < v.size(); ++i) {
1759
1700
  rb_ary_push(result, INT2FIX(v[i]));
1760
1701
  }
1761
1702
  }
@@ -1769,7 +1710,7 @@ static VALUE re2_set_match(int argc, VALUE *argv, VALUE self) {
1769
1710
  VALUE result = rb_ary_new2(v.size());
1770
1711
 
1771
1712
  if (matched) {
1772
- for (size_t i = 0; i < v.size(); i++) {
1713
+ for (std::vector<int>::size_type i = 0; i < v.size(); ++i) {
1773
1714
  rb_ary_push(result, INT2FIX(v[i]));
1774
1715
  }
1775
1716
  }
@@ -1919,7 +1860,7 @@ void Init_re2(void) {
1919
1860
  rb_define_singleton_method(re2_cRegexp, "compile",
1920
1861
  RUBY_METHOD_FUNC(rb_class_new_instance), -1);
1921
1862
 
1922
- rb_define_global_function("RE2", RUBY_METHOD_FUNC(re2_re2), -1);
1863
+ rb_define_module_function(rb_mKernel, "RE2", RUBY_METHOD_FUNC(re2_re2), -1);
1923
1864
 
1924
1865
  /* Create the symbols used in options. */
1925
1866
  id_utf8 = rb_intern("utf8");
data/lib/2.6/re2.bundle CHANGED
Binary file
data/lib/2.7/re2.bundle CHANGED
Binary file
data/lib/3.0/re2.bundle CHANGED
Binary file
data/lib/3.1/re2.bundle CHANGED
Binary file
data/lib/3.2/re2.bundle CHANGED
Binary file
data/lib/re2/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module RE2
4
- VERSION = "2.1.0"
4
+ VERSION = "2.1.2"
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: re2
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.1.0
4
+ version: 2.1.2
5
5
  platform: x86_64-darwin
6
6
  authors:
7
7
  - Paul Mucur
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2023-09-16 00:00:00.000000000 Z
12
+ date: 2023-09-20 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rake-compiler