re2 2.1.0-x86_64-linux → 2.1.1-x86_64-linux

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 0b465c9de4002e0e2efc335f26da6e437b99d3e081eaada089121de492038942
4
- data.tar.gz: 58ed1dc3e69ab368ea01154432f6578b7bbf7bae738e6a8186d77345b95053e7
3
+ metadata.gz: 3c18c2ee7ddd5172ec4d8da5c0c62a3f11c73839e694a163d118fba2b0676eab
4
+ data.tar.gz: d416e5e2347245271ab28c3afaf53ec72a76f7dcee9b6129b97728c4253ad32f
5
5
  SHA512:
6
- metadata.gz: 28ea3304a137f0fec771f754ffaffcaec0cdaec7312929de86b80881807420402fdc7069059f665f3046365a76e88fc33fb187c479b2822ccb2bc5bdaebdf0de
7
- data.tar.gz: 598932fff4fccf3b4ba8edf514cbda562a4c3a9053960daac8730632515e51d0faf407f442a419b9f8337b4207a194c5ee0367bc3bc945488ef0e753b6404bb3
6
+ metadata.gz: a823a1005b12dc6c90af68e42820d8a594b363a76051160e024d9bbe6132c46cf7628e6a5ba073d99c9a93df8bf12e6ca110e5a5ecb56139cc621652354cf0b4
7
+ data.tar.gz: 59b57ee232b8752a6a565c92a701ccae97694542f99d0ff882a5cd29b870c780a77d898bdfbab5627507ef64e969f479846dd5bfa1ff956e1dc0939e88881e77
data/README.md CHANGED
@@ -5,7 +5,7 @@ Ruby bindings to [RE2][], a "fast, safe, thread-friendly alternative to
5
5
  backtracking regular expression engines like those used in PCRE, Perl, and
6
6
  Python".
7
7
 
8
- **Current version:** 2.1.0
8
+ **Current version:** 2.1.1
9
9
  **Supported Ruby versions:** 2.6, 2.7, 3.0, 3.1, 3.2
10
10
  **Bundled RE2 version:** libre2.11 (2023-09-01)
11
11
  **Supported RE2 versions:** libre2.0 (< 2020-03-02), libre2.1 (2020-03-02), libre2.6 (2020-03-03), libre2.7 (2020-05-01), libre2.8 (2020-07-06), libre2.9 (2020-11-01), libre2.10 (2022-12-01), libre2.11 (2023-07-01)
data/ext/re2/extconf.rb CHANGED
@@ -128,7 +128,6 @@ def build_extension(static_p = false)
128
128
 
129
129
  have_library("stdc++")
130
130
  have_header("stdint.h")
131
- have_func("rb_str_sublen")
132
131
 
133
132
  if !static_p and !have_library("re2")
134
133
  abort "You must have re2 installed and specified with --with-re2-dir, please see https://github.com/google/re2/wiki/Install"
data/ext/re2/re2.cc CHANGED
@@ -6,68 +6,21 @@
6
6
  * Released under the BSD Licence, please see LICENSE.txt
7
7
  */
8
8
 
9
- #include <ruby.h>
10
- #include <re2/re2.h>
11
- #include <re2/set.h>
12
9
  #include <stdint.h>
13
- #include <string>
10
+
11
+ #include <map>
14
12
  #include <sstream>
13
+ #include <string>
15
14
  #include <vector>
16
- using std::string;
17
- using std::ostringstream;
18
- using std::nothrow;
19
- using std::map;
20
- using std::vector;
15
+
16
+ #include <re2/re2.h>
17
+ #include <re2/set.h>
18
+ #include <ruby.h>
19
+ #include <ruby/encoding.h>
21
20
 
22
21
  #define BOOL2RUBY(v) (v ? Qtrue : Qfalse)
23
22
  #define UNUSED(x) ((void)x)
24
23
 
25
- #ifndef RSTRING_LEN
26
- #define RSTRING_LEN(x) (RSTRING(x)->len)
27
- #endif
28
-
29
- #ifndef RSTRING_PTR
30
- #define RSTRING_PTR(x) (RSTRING(x)->ptr)
31
- #endif
32
-
33
- #ifdef HAVE_RUBY_ENCODING_H
34
- #include <ruby/encoding.h>
35
- #define ENCODED_STR_NEW(str, length, encoding) \
36
- ({ \
37
- VALUE _string = rb_str_new(str, length); \
38
- int _enc = rb_enc_find_index(encoding); \
39
- rb_enc_associate_index(_string, _enc); \
40
- _string; \
41
- })
42
- #else
43
- #define ENCODED_STR_NEW(str, length, encoding) \
44
- rb_str_new((const char *)str, (long)length)
45
- #endif
46
-
47
- #ifdef HAVE_RB_STR_SUBLEN
48
- #define ENCODED_STR_SUBLEN(str, offset, encoding) \
49
- LONG2NUM(rb_str_sublen(str, offset))
50
- #else
51
- #ifdef HAVE_RUBY_ENCODING_H
52
- #define ENCODED_STR_SUBLEN(str, offset, encoding) \
53
- ({ \
54
- VALUE _string = ENCODED_STR_NEW(RSTRING_PTR(str), offset, encoding); \
55
- rb_str_length(_string); \
56
- })
57
- #else
58
- #define ENCODED_STR_SUBLEN(str, offset, encoding) \
59
- LONG2NUM(offset)
60
- #endif
61
- #endif
62
-
63
- #ifdef HAVE_ENDPOS_ARGUMENT
64
- #define match(pattern, text, startpos, endpos, anchor, match, nmatch) \
65
- (pattern->Match(text, startpos, endpos, anchor, match, nmatch))
66
- #else
67
- #define match(pattern, text, startpos, endpos, anchor, match, nmatch) \
68
- (pattern->Match(text, startpos, anchor, match, nmatch))
69
- #endif
70
-
71
24
  typedef struct {
72
25
  RE2 *pattern;
73
26
  } re2_pattern;
@@ -98,95 +51,103 @@ static ID id_utf8, id_posix_syntax, id_longest_match, id_log_errors,
98
51
  id_perl_classes, id_word_boundary, id_one_line,
99
52
  id_unanchored, id_anchor_start, id_anchor_both, id_exception;
100
53
 
101
- void parse_re2_options(RE2::Options& re2_options, VALUE options) {
54
+ inline VALUE encoded_str_new(const char *str, long length, RE2::Options::Encoding encoding) {
55
+ if (encoding == RE2::Options::EncodingUTF8) {
56
+ return rb_utf8_str_new(str, length);
57
+ }
58
+
59
+ VALUE string = rb_str_new(str, length);
60
+ rb_enc_associate_index(string, rb_enc_find_index("ISO-8859-1"));
61
+
62
+ return string;
63
+ }
64
+
65
+ static void parse_re2_options(RE2::Options* re2_options, const VALUE options) {
102
66
  if (TYPE(options) != T_HASH) {
103
67
  rb_raise(rb_eArgError, "options should be a hash");
104
68
  }
105
- VALUE utf8, posix_syntax, longest_match, log_errors,
106
- max_mem, literal, never_nl, case_sensitive, perl_classes,
107
- word_boundary, one_line;
108
69
 
109
- utf8 = rb_hash_aref(options, ID2SYM(id_utf8));
70
+ VALUE utf8 = rb_hash_aref(options, ID2SYM(id_utf8));
110
71
  if (!NIL_P(utf8)) {
111
- re2_options.set_encoding(RTEST(utf8) ? RE2::Options::EncodingUTF8 : RE2::Options::EncodingLatin1);
72
+ re2_options->set_encoding(RTEST(utf8) ? RE2::Options::EncodingUTF8 : RE2::Options::EncodingLatin1);
112
73
  }
113
74
 
114
- posix_syntax = rb_hash_aref(options, ID2SYM(id_posix_syntax));
75
+ VALUE posix_syntax = rb_hash_aref(options, ID2SYM(id_posix_syntax));
115
76
  if (!NIL_P(posix_syntax)) {
116
- re2_options.set_posix_syntax(RTEST(posix_syntax));
77
+ re2_options->set_posix_syntax(RTEST(posix_syntax));
117
78
  }
118
79
 
119
- longest_match = rb_hash_aref(options, ID2SYM(id_longest_match));
80
+ VALUE longest_match = rb_hash_aref(options, ID2SYM(id_longest_match));
120
81
  if (!NIL_P(longest_match)) {
121
- re2_options.set_longest_match(RTEST(longest_match));
82
+ re2_options->set_longest_match(RTEST(longest_match));
122
83
  }
123
84
 
124
- log_errors = rb_hash_aref(options, ID2SYM(id_log_errors));
85
+ VALUE log_errors = rb_hash_aref(options, ID2SYM(id_log_errors));
125
86
  if (!NIL_P(log_errors)) {
126
- re2_options.set_log_errors(RTEST(log_errors));
87
+ re2_options->set_log_errors(RTEST(log_errors));
127
88
  }
128
89
 
129
- max_mem = rb_hash_aref(options, ID2SYM(id_max_mem));
90
+ VALUE max_mem = rb_hash_aref(options, ID2SYM(id_max_mem));
130
91
  if (!NIL_P(max_mem)) {
131
- re2_options.set_max_mem(NUM2INT(max_mem));
92
+ re2_options->set_max_mem(NUM2INT(max_mem));
132
93
  }
133
94
 
134
- literal = rb_hash_aref(options, ID2SYM(id_literal));
95
+ VALUE literal = rb_hash_aref(options, ID2SYM(id_literal));
135
96
  if (!NIL_P(literal)) {
136
- re2_options.set_literal(RTEST(literal));
97
+ re2_options->set_literal(RTEST(literal));
137
98
  }
138
99
 
139
- never_nl = rb_hash_aref(options, ID2SYM(id_never_nl));
100
+ VALUE never_nl = rb_hash_aref(options, ID2SYM(id_never_nl));
140
101
  if (!NIL_P(never_nl)) {
141
- re2_options.set_never_nl(RTEST(never_nl));
102
+ re2_options->set_never_nl(RTEST(never_nl));
142
103
  }
143
104
 
144
- case_sensitive = rb_hash_aref(options, ID2SYM(id_case_sensitive));
105
+ VALUE case_sensitive = rb_hash_aref(options, ID2SYM(id_case_sensitive));
145
106
  if (!NIL_P(case_sensitive)) {
146
- re2_options.set_case_sensitive(RTEST(case_sensitive));
107
+ re2_options->set_case_sensitive(RTEST(case_sensitive));
147
108
  }
148
109
 
149
- perl_classes = rb_hash_aref(options, ID2SYM(id_perl_classes));
110
+ VALUE perl_classes = rb_hash_aref(options, ID2SYM(id_perl_classes));
150
111
  if (!NIL_P(perl_classes)) {
151
- re2_options.set_perl_classes(RTEST(perl_classes));
112
+ re2_options->set_perl_classes(RTEST(perl_classes));
152
113
  }
153
114
 
154
- word_boundary = rb_hash_aref(options, ID2SYM(id_word_boundary));
115
+ VALUE word_boundary = rb_hash_aref(options, ID2SYM(id_word_boundary));
155
116
  if (!NIL_P(word_boundary)) {
156
- re2_options.set_word_boundary(RTEST(word_boundary));
117
+ re2_options->set_word_boundary(RTEST(word_boundary));
157
118
  }
158
119
 
159
- one_line = rb_hash_aref(options, ID2SYM(id_one_line));
120
+ VALUE one_line = rb_hash_aref(options, ID2SYM(id_one_line));
160
121
  if (!NIL_P(one_line)) {
161
- re2_options.set_one_line(RTEST(one_line));
122
+ re2_options->set_one_line(RTEST(one_line));
162
123
  }
163
124
  }
164
125
 
165
- void re2_matchdata_mark(re2_matchdata* self) {
126
+ static void re2_matchdata_mark(re2_matchdata* self) {
166
127
  rb_gc_mark(self->regexp);
167
128
  rb_gc_mark(self->text);
168
129
  }
169
130
 
170
- void re2_matchdata_free(re2_matchdata* self) {
131
+ static void re2_matchdata_free(re2_matchdata* self) {
171
132
  if (self->matches) {
172
133
  delete[] self->matches;
173
134
  }
174
135
  free(self);
175
136
  }
176
137
 
177
- void re2_scanner_mark(re2_scanner* self) {
138
+ static void re2_scanner_mark(re2_scanner* self) {
178
139
  rb_gc_mark(self->regexp);
179
140
  rb_gc_mark(self->text);
180
141
  }
181
142
 
182
- void re2_scanner_free(re2_scanner* self) {
143
+ static void re2_scanner_free(re2_scanner* self) {
183
144
  if (self->input) {
184
145
  delete self->input;
185
146
  }
186
147
  free(self);
187
148
  }
188
149
 
189
- void re2_regexp_free(re2_pattern* self) {
150
+ static void re2_regexp_free(re2_pattern* self) {
190
151
  if (self->pattern) {
191
152
  delete self->pattern;
192
153
  }
@@ -195,12 +156,14 @@ void re2_regexp_free(re2_pattern* self) {
195
156
 
196
157
  static VALUE re2_matchdata_allocate(VALUE klass) {
197
158
  re2_matchdata *m;
159
+
198
160
  return Data_Make_Struct(klass, re2_matchdata, re2_matchdata_mark,
199
161
  re2_matchdata_free, m);
200
162
  }
201
163
 
202
164
  static VALUE re2_scanner_allocate(VALUE klass) {
203
165
  re2_scanner *c;
166
+
204
167
  return Data_Make_Struct(klass, re2_scanner, re2_scanner_mark,
205
168
  re2_scanner_free, c);
206
169
  }
@@ -213,7 +176,7 @@ static VALUE re2_scanner_allocate(VALUE klass) {
213
176
  * m = RE2::Regexp.new('(\d+)').match("bob 123")
214
177
  * m.string #=> "bob 123"
215
178
  */
216
- static VALUE re2_matchdata_string(VALUE self) {
179
+ static VALUE re2_matchdata_string(const VALUE self) {
217
180
  re2_matchdata *m;
218
181
  Data_Get_Struct(self, re2_matchdata, m);
219
182
 
@@ -228,7 +191,7 @@ static VALUE re2_matchdata_string(VALUE self) {
228
191
  * c = RE2::Regexp.new('(\d+)').scan("foo")
229
192
  * c.string #=> "foo"
230
193
  */
231
- static VALUE re2_scanner_string(VALUE self) {
194
+ static VALUE re2_scanner_string(const VALUE self) {
232
195
  re2_scanner *c;
233
196
  Data_Get_Struct(self, re2_scanner, c);
234
197
 
@@ -243,7 +206,7 @@ static VALUE re2_scanner_string(VALUE self) {
243
206
  * c = RE2::Regexp.new('(\d+)').scan("foo")
244
207
  * c.eof? #=> true
245
208
  */
246
- static VALUE re2_scanner_eof(VALUE self) {
209
+ static VALUE re2_scanner_eof(const VALUE self) {
247
210
  re2_scanner *c;
248
211
  Data_Get_Struct(self, re2_scanner, c);
249
212
 
@@ -265,7 +228,7 @@ static VALUE re2_scanner_rewind(VALUE self) {
265
228
  re2_scanner *c;
266
229
  Data_Get_Struct(self, re2_scanner, c);
267
230
 
268
- c->input = new(nothrow) re2::StringPiece(StringValuePtr(c->text));
231
+ c->input = new(std::nothrow) re2::StringPiece(StringValuePtr(c->text));
269
232
  c->eof = false;
270
233
 
271
234
  return self;
@@ -286,44 +249,41 @@ static VALUE re2_scanner_rewind(VALUE self) {
286
249
  * s.scan #=> ["bar"]
287
250
  */
288
251
  static VALUE re2_scanner_scan(VALUE self) {
289
- int i;
290
- size_t original_input_size, new_input_size;
291
- bool input_advanced;
292
252
  re2_pattern *p;
293
253
  re2_scanner *c;
294
- VALUE result;
295
254
 
296
255
  Data_Get_Struct(self, re2_scanner, c);
297
256
  Data_Get_Struct(c->regexp, re2_pattern, p);
298
257
 
299
- vector<RE2::Arg> argv(c->number_of_capturing_groups);
300
- vector<RE2::Arg*> args(c->number_of_capturing_groups);
301
- vector<string> matches(c->number_of_capturing_groups);
258
+ std::vector<RE2::Arg> argv(c->number_of_capturing_groups);
259
+ std::vector<RE2::Arg*> args(c->number_of_capturing_groups);
260
+ std::vector<std::string> matches(c->number_of_capturing_groups);
302
261
 
303
262
  if (c->eof) {
304
263
  return Qnil;
305
264
  }
306
265
 
307
- original_input_size = c->input->size();
266
+ re2::StringPiece::size_type original_input_size = c->input->size();
308
267
 
309
- for (i = 0; i < c->number_of_capturing_groups; i++) {
268
+ for (int i = 0; i < c->number_of_capturing_groups; ++i) {
310
269
  argv[i] = &matches[i];
311
270
  args[i] = &argv[i];
312
271
  }
313
272
 
314
273
  if (RE2::FindAndConsumeN(c->input, *p->pattern, &args[0],
315
274
  c->number_of_capturing_groups)) {
316
- result = rb_ary_new2(c->number_of_capturing_groups);
317
- new_input_size = c->input->size();
318
- input_advanced = new_input_size < original_input_size;
275
+ re2::StringPiece::size_type new_input_size = c->input->size();
276
+ bool input_advanced = new_input_size < original_input_size;
277
+
278
+ VALUE result = rb_ary_new2(c->number_of_capturing_groups);
319
279
 
320
- for (i = 0; i < c->number_of_capturing_groups; i++) {
280
+ for (int i = 0; i < c->number_of_capturing_groups; ++i) {
321
281
  if (matches[i].empty()) {
322
282
  rb_ary_push(result, Qnil);
323
283
  } else {
324
- rb_ary_push(result, ENCODED_STR_NEW(matches[i].data(),
284
+ rb_ary_push(result, encoded_str_new(matches[i].data(),
325
285
  matches[i].size(),
326
- p->pattern->options().encoding() == RE2::Options::EncodingUTF8 ? "UTF-8" : "ISO-8859-1"));
286
+ p->pattern->options().encoding()));
327
287
  }
328
288
  }
329
289
 
@@ -334,47 +294,40 @@ static VALUE re2_scanner_scan(VALUE self) {
334
294
  if (!input_advanced && new_input_size > 0) {
335
295
  c->input->remove_prefix(1);
336
296
  }
297
+
298
+ return result;
337
299
  } else {
338
- result = Qnil;
300
+ return Qnil;
339
301
  }
340
-
341
- return result;
342
302
  }
343
303
 
344
304
  /*
345
305
  * Retrieve a matchdata by index or name.
346
306
  */
347
- re2::StringPiece *re2_matchdata_find_match(VALUE idx, VALUE self) {
348
- int id;
307
+ static re2::StringPiece *re2_matchdata_find_match(VALUE idx, const VALUE self) {
349
308
  re2_matchdata *m;
350
309
  re2_pattern *p;
351
- map<string, int> groups;
352
- string name;
353
- re2::StringPiece *match;
354
310
 
355
311
  Data_Get_Struct(self, re2_matchdata, m);
356
312
  Data_Get_Struct(m->regexp, re2_pattern, p);
357
313
 
314
+ int id;
315
+
358
316
  if (FIXNUM_P(idx)) {
359
317
  id = FIX2INT(idx);
360
318
  } else {
361
- if (SYMBOL_P(idx)) {
362
- name = rb_id2name(SYM2ID(idx));
363
- } else {
364
- name = StringValuePtr(idx);
365
- }
319
+ const char *name = SYMBOL_P(idx) ? rb_id2name(SYM2ID(idx)) : StringValuePtr(idx);
320
+ const std::map<std::string, int>& groups = p->pattern->NamedCapturingGroups();
366
321
 
367
- groups = p->pattern->NamedCapturingGroups();
368
-
369
- if (groups.count(name) == 1) {
370
- id = groups[name];
322
+ if (std::map<std::string, int>::const_iterator search = groups.find(name); search != groups.end()) {
323
+ id = search->second;
371
324
  } else {
372
325
  return NULL;
373
326
  }
374
327
  }
375
328
 
376
329
  if (id >= 0 && id < m->number_of_matches) {
377
- match = &m->matches[id];
330
+ re2::StringPiece *match = &m->matches[id];
378
331
 
379
332
  if (!match->empty()) {
380
333
  return match;
@@ -393,7 +346,7 @@ re2::StringPiece *re2_matchdata_find_match(VALUE idx, VALUE self) {
393
346
  * m.size #=> 2
394
347
  * m.length #=> 2
395
348
  */
396
- static VALUE re2_matchdata_size(VALUE self) {
349
+ static VALUE re2_matchdata_size(const VALUE self) {
397
350
  re2_matchdata *m;
398
351
  Data_Get_Struct(self, re2_matchdata, m);
399
352
 
@@ -410,23 +363,18 @@ static VALUE re2_matchdata_size(VALUE self) {
410
363
  * m.begin(0) #=> 1
411
364
  * m.begin(1) #=> 4
412
365
  */
413
- static VALUE re2_matchdata_begin(VALUE self, VALUE n) {
366
+ static VALUE re2_matchdata_begin(const VALUE self, VALUE n) {
414
367
  re2_matchdata *m;
415
- re2_pattern *p;
416
- re2::StringPiece *match;
417
- long offset;
418
368
 
419
369
  Data_Get_Struct(self, re2_matchdata, m);
420
- Data_Get_Struct(m->regexp, re2_pattern, p);
421
370
 
422
- match = re2_matchdata_find_match(n, self);
371
+ re2::StringPiece *match = re2_matchdata_find_match(n, self);
423
372
  if (match == NULL) {
424
373
  return Qnil;
425
374
  } else {
426
- offset = reinterpret_cast<uintptr_t>(match->data()) - reinterpret_cast<uintptr_t>(StringValuePtr(m->text));
375
+ long offset = match->data() - StringValuePtr(m->text);
427
376
 
428
- return ENCODED_STR_SUBLEN(StringValue(m->text), offset,
429
- p->pattern->options().encoding() == RE2::Options::EncodingUTF8 ? "UTF-8" : "ISO-8859-1");
377
+ return LONG2NUM(rb_str_sublen(StringValue(m->text), offset));
430
378
  }
431
379
  }
432
380
 
@@ -440,24 +388,18 @@ static VALUE re2_matchdata_begin(VALUE self, VALUE n) {
440
388
  * m.end(0) #=> 9
441
389
  * m.end(1) #=> 7
442
390
  */
443
- static VALUE re2_matchdata_end(VALUE self, VALUE n) {
391
+ static VALUE re2_matchdata_end(const VALUE self, VALUE n) {
444
392
  re2_matchdata *m;
445
- re2_pattern *p;
446
- re2::StringPiece *match;
447
- long offset;
448
393
 
449
394
  Data_Get_Struct(self, re2_matchdata, m);
450
- Data_Get_Struct(m->regexp, re2_pattern, p);
451
-
452
- match = re2_matchdata_find_match(n, self);
453
395
 
396
+ re2::StringPiece *match = re2_matchdata_find_match(n, self);
454
397
  if (match == NULL) {
455
398
  return Qnil;
456
399
  } else {
457
- offset = reinterpret_cast<uintptr_t>(match->data()) - reinterpret_cast<uintptr_t>(StringValuePtr(m->text)) + match->size();
400
+ long offset = (match->data() - StringValuePtr(m->text)) + match->size();
458
401
 
459
- return ENCODED_STR_SUBLEN(StringValue(m->text), offset,
460
- p->pattern->options().encoding() == RE2::Options::EncodingUTF8 ? "UTF-8" : "ISO-8859-1");
402
+ return LONG2NUM(rb_str_sublen(StringValue(m->text), offset));
461
403
  }
462
404
  }
463
405
 
@@ -469,9 +411,10 @@ static VALUE re2_matchdata_end(VALUE self, VALUE n) {
469
411
  * m = RE2::Regexp.new('(\d+)').match("bob 123")
470
412
  * m.regexp #=> #<RE2::Regexp /(\d+)/>
471
413
  */
472
- static VALUE re2_matchdata_regexp(VALUE self) {
414
+ static VALUE re2_matchdata_regexp(const VALUE self) {
473
415
  re2_matchdata *m;
474
416
  Data_Get_Struct(self, re2_matchdata, m);
417
+
475
418
  return m->regexp;
476
419
  }
477
420
 
@@ -483,7 +426,7 @@ static VALUE re2_matchdata_regexp(VALUE self) {
483
426
  * c = RE2::Regexp.new('(\d+)').scan("bob 123")
484
427
  * c.regexp #=> #<RE2::Regexp /(\d+)/>
485
428
  */
486
- static VALUE re2_scanner_regexp(VALUE self) {
429
+ static VALUE re2_scanner_regexp(const VALUE self) {
487
430
  re2_scanner *c;
488
431
  Data_Get_Struct(self, re2_scanner, c);
489
432
 
@@ -492,6 +435,7 @@ static VALUE re2_scanner_regexp(VALUE self) {
492
435
 
493
436
  static VALUE re2_regexp_allocate(VALUE klass) {
494
437
  re2_pattern *p;
438
+
495
439
  return Data_Make_Struct(klass, re2_pattern, 0, re2_regexp_free, p);
496
440
  }
497
441
 
@@ -507,35 +451,31 @@ static VALUE re2_regexp_allocate(VALUE klass) {
507
451
  * m = RE2::Regexp.new('(\d+)').match("bob 123")
508
452
  * m.to_a #=> ["123", "123"]
509
453
  */
510
- static VALUE re2_matchdata_to_a(VALUE self) {
511
- int i;
454
+ static VALUE re2_matchdata_to_a(const VALUE self) {
512
455
  re2_matchdata *m;
513
456
  re2_pattern *p;
514
- re2::StringPiece *match;
515
- VALUE array;
516
457
 
517
458
  Data_Get_Struct(self, re2_matchdata, m);
518
459
  Data_Get_Struct(m->regexp, re2_pattern, p);
519
460
 
520
- array = rb_ary_new2(m->number_of_matches);
521
- for (i = 0; i < m->number_of_matches; i++) {
522
- match = &m->matches[i];
461
+ VALUE array = rb_ary_new2(m->number_of_matches);
462
+ for (int i = 0; i < m->number_of_matches; ++i) {
463
+ re2::StringPiece *match = &m->matches[i];
523
464
 
524
465
  if (match->empty()) {
525
466
  rb_ary_push(array, Qnil);
526
467
  } else {
527
- rb_ary_push(array, ENCODED_STR_NEW(match->data(), match->size(),
528
- p->pattern->options().encoding() == RE2::Options::EncodingUTF8 ? "UTF-8" : "ISO-8859-1"));
468
+ rb_ary_push(array, encoded_str_new(match->data(), match->size(),
469
+ p->pattern->options().encoding()));
529
470
  }
530
471
  }
531
472
 
532
473
  return array;
533
474
  }
534
475
 
535
- static VALUE re2_matchdata_nth_match(int nth, VALUE self) {
476
+ static VALUE re2_matchdata_nth_match(int nth, const VALUE self) {
536
477
  re2_matchdata *m;
537
478
  re2_pattern *p;
538
- re2::StringPiece *match;
539
479
 
540
480
  Data_Get_Struct(self, re2_matchdata, m);
541
481
  Data_Get_Struct(m->regexp, re2_pattern, p);
@@ -543,32 +483,28 @@ static VALUE re2_matchdata_nth_match(int nth, VALUE self) {
543
483
  if (nth < 0 || nth >= m->number_of_matches) {
544
484
  return Qnil;
545
485
  } else {
546
- match = &m->matches[nth];
486
+ re2::StringPiece *match = &m->matches[nth];
547
487
 
548
488
  if (match->empty()) {
549
489
  return Qnil;
550
490
  } else {
551
- return ENCODED_STR_NEW(match->data(), match->size(),
552
- p->pattern->options().encoding() == RE2::Options::EncodingUTF8 ? "UTF-8" : "ISO-8859-1");
491
+ return encoded_str_new(match->data(), match->size(),
492
+ p->pattern->options().encoding());
553
493
  }
554
494
  }
555
495
  }
556
496
 
557
- static VALUE re2_matchdata_named_match(const char* name, VALUE self) {
558
- int idx;
497
+ static VALUE re2_matchdata_named_match(const char* name, const VALUE self) {
559
498
  re2_matchdata *m;
560
499
  re2_pattern *p;
561
- map<string, int> groups;
562
- string name_as_string(name);
563
500
 
564
501
  Data_Get_Struct(self, re2_matchdata, m);
565
502
  Data_Get_Struct(m->regexp, re2_pattern, p);
566
503
 
567
- groups = p->pattern->NamedCapturingGroups();
504
+ const std::map<std::string, int>& groups = p->pattern->NamedCapturingGroups();
568
505
 
569
- if (groups.count(name_as_string) == 1) {
570
- idx = groups[name_as_string];
571
- return re2_matchdata_nth_match(idx, self);
506
+ if (std::map<std::string, int>::const_iterator search = groups.find(name); search != groups.end()) {
507
+ return re2_matchdata_nth_match(search->second, self);
572
508
  } else {
573
509
  return Qnil;
574
510
  }
@@ -621,7 +557,7 @@ static VALUE re2_matchdata_named_match(const char* name, VALUE self) {
621
557
  * m["number"] #=> "123"
622
558
  * m[:number] #=> "123"
623
559
  */
624
- static VALUE re2_matchdata_aref(int argc, VALUE *argv, VALUE self) {
560
+ static VALUE re2_matchdata_aref(int argc, VALUE *argv, const VALUE self) {
625
561
  VALUE idx, rest;
626
562
  rb_scan_args(argc, argv, "11", &idx, &rest);
627
563
 
@@ -641,7 +577,7 @@ static VALUE re2_matchdata_aref(int argc, VALUE *argv, VALUE self) {
641
577
  *
642
578
  * @return [String] the entire matched string
643
579
  */
644
- static VALUE re2_matchdata_to_s(VALUE self) {
580
+ static VALUE re2_matchdata_to_s(const VALUE self) {
645
581
  return re2_matchdata_nth_match(0, self);
646
582
  }
647
583
 
@@ -657,26 +593,24 @@ static VALUE re2_matchdata_to_s(VALUE self) {
657
593
  * m = RE2::Regexp.new('(\d+)').match("bob 123")
658
594
  * m.inspect #=> "#<RE2::MatchData \"123\" 1:\"123\">"
659
595
  */
660
- static VALUE re2_matchdata_inspect(VALUE self) {
661
- int i;
596
+ static VALUE re2_matchdata_inspect(const VALUE self) {
662
597
  re2_matchdata *m;
663
598
  re2_pattern *p;
664
- VALUE match, result;
665
- ostringstream output;
666
599
 
667
600
  Data_Get_Struct(self, re2_matchdata, m);
668
601
  Data_Get_Struct(m->regexp, re2_pattern, p);
669
602
 
603
+ std::ostringstream output;
670
604
  output << "#<RE2::MatchData";
671
605
 
672
- for (i = 0; i < m->number_of_matches; i++) {
606
+ for (int i = 0; i < m->number_of_matches; ++i) {
673
607
  output << " ";
674
608
 
675
609
  if (i > 0) {
676
610
  output << i << ":";
677
611
  }
678
612
 
679
- match = re2_matchdata_nth_match(i, self);
613
+ VALUE match = re2_matchdata_nth_match(i, self);
680
614
 
681
615
  if (match == Qnil) {
682
616
  output << "nil";
@@ -687,10 +621,8 @@ static VALUE re2_matchdata_inspect(VALUE self) {
687
621
 
688
622
  output << ">";
689
623
 
690
- result = ENCODED_STR_NEW(output.str().data(), output.str().length(),
691
- p->pattern->options().encoding() == RE2::Options::EncodingUTF8 ? "UTF-8" : "ISO-8859-1");
692
-
693
- return result;
624
+ return encoded_str_new(output.str().data(), output.str().length(),
625
+ p->pattern->options().encoding());
694
626
  }
695
627
 
696
628
  /*
@@ -713,25 +645,22 @@ static VALUE re2_matchdata_inspect(VALUE self) {
713
645
  * puts "Unrecognised match"
714
646
  * end
715
647
  */
716
- static VALUE re2_matchdata_deconstruct(VALUE self) {
717
- int i;
648
+ static VALUE re2_matchdata_deconstruct(const VALUE self) {
718
649
  re2_matchdata *m;
719
650
  re2_pattern *p;
720
- re2::StringPiece *match;
721
- VALUE array;
722
651
 
723
652
  Data_Get_Struct(self, re2_matchdata, m);
724
653
  Data_Get_Struct(m->regexp, re2_pattern, p);
725
654
 
726
- array = rb_ary_new2(m->number_of_matches - 1);
727
- for (i = 1; i < m->number_of_matches; i++) {
728
- match = &m->matches[i];
655
+ VALUE array = rb_ary_new2(m->number_of_matches - 1);
656
+ for (int i = 1; i < m->number_of_matches; ++i) {
657
+ re2::StringPiece *match = &m->matches[i];
729
658
 
730
659
  if (match->empty()) {
731
660
  rb_ary_push(array, Qnil);
732
661
  } else {
733
- rb_ary_push(array, ENCODED_STR_NEW(match->data(), match->size(),
734
- p->pattern->options().encoding() == RE2::Options::EncodingUTF8 ? "UTF-8" : "ISO-8859-1"));
662
+ rb_ary_push(array, encoded_str_new(match->data(), match->size(),
663
+ p->pattern->options().encoding()));
735
664
  }
736
665
  }
737
666
 
@@ -766,40 +695,36 @@ static VALUE re2_matchdata_deconstruct(VALUE self) {
766
695
  * puts "Unrecognised match"
767
696
  * end
768
697
  */
769
- static VALUE re2_matchdata_deconstruct_keys(VALUE self, VALUE keys) {
770
- int i;
771
- VALUE capturing_groups, key;
698
+ static VALUE re2_matchdata_deconstruct_keys(const VALUE self, const VALUE keys) {
772
699
  re2_matchdata *m;
773
700
  re2_pattern *p;
774
- map<string, int> groups;
775
- map<string, int>::iterator iterator;
776
701
 
777
702
  Data_Get_Struct(self, re2_matchdata, m);
778
703
  Data_Get_Struct(m->regexp, re2_pattern, p);
779
704
 
780
- groups = p->pattern->NamedCapturingGroups();
781
- capturing_groups = rb_hash_new();
705
+ const std::map<std::string, int>& groups = p->pattern->NamedCapturingGroups();
706
+ VALUE capturing_groups = rb_hash_new();
782
707
 
783
708
  if (NIL_P(keys)) {
784
- for (iterator = groups.begin(); iterator != groups.end(); iterator++) {
709
+ for (std::map<std::string, int>::const_iterator it = groups.begin(); it != groups.end(); ++it) {
785
710
  rb_hash_aset(capturing_groups,
786
- ID2SYM(rb_intern(iterator->first.data())),
787
- re2_matchdata_nth_match(iterator->second, self));
711
+ ID2SYM(rb_intern(it->first.data())),
712
+ re2_matchdata_nth_match(it->second, self));
788
713
  }
789
714
  } else {
790
715
  Check_Type(keys, T_ARRAY);
791
716
 
792
717
  if (p->pattern->NumberOfCapturingGroups() >= RARRAY_LEN(keys)) {
793
- for (i = 0; i < RARRAY_LEN(keys); i++) {
794
- key = rb_ary_entry(keys, i);
718
+ for (int i = 0; i < RARRAY_LEN(keys); ++i) {
719
+ VALUE key = rb_ary_entry(keys, i);
795
720
  Check_Type(key, T_SYMBOL);
796
- string name(rb_id2name(SYM2ID(key)));
721
+ const char *name = rb_id2name(SYM2ID(key));
797
722
 
798
- if (groups.count(name) == 0) {
723
+ if (std::map<std::string, int>::const_iterator search = groups.find(name); search != groups.end()) {
724
+ rb_hash_aset(capturing_groups, key, re2_matchdata_nth_match(search->second, self));
725
+ } else {
799
726
  break;
800
727
  }
801
-
802
- rb_hash_aset(capturing_groups, key, re2_matchdata_nth_match(groups[name], self));
803
728
  }
804
729
  }
805
730
  }
@@ -816,6 +741,7 @@ static VALUE re2_matchdata_deconstruct_keys(VALUE self, VALUE keys) {
816
741
  */
817
742
  static VALUE re2_re2(int argc, VALUE *argv, VALUE self) {
818
743
  UNUSED(self);
744
+
819
745
  return rb_class_new_instance(argc, argv, re2_cRegexp);
820
746
  }
821
747
 
@@ -863,11 +789,11 @@ static VALUE re2_regexp_initialize(int argc, VALUE *argv, VALUE self) {
863
789
 
864
790
  if (RTEST(options)) {
865
791
  RE2::Options re2_options;
866
- parse_re2_options(re2_options, options);
792
+ parse_re2_options(&re2_options, options);
867
793
 
868
- p->pattern = new(nothrow) RE2(StringValuePtr(pattern), re2_options);
794
+ p->pattern = new(std::nothrow) RE2(StringValuePtr(pattern), re2_options);
869
795
  } else {
870
- p->pattern = new(nothrow) RE2(StringValuePtr(pattern));
796
+ p->pattern = new(std::nothrow) RE2(StringValuePtr(pattern));
871
797
  }
872
798
 
873
799
  if (p->pattern == 0) {
@@ -889,19 +815,17 @@ static VALUE re2_regexp_initialize(int argc, VALUE *argv, VALUE self) {
889
815
  * re2 = RE2::Regexp.new("woo?")
890
816
  * re2.inspect #=> "#<RE2::Regexp /woo?/>"
891
817
  */
892
- static VALUE re2_regexp_inspect(VALUE self) {
818
+ static VALUE re2_regexp_inspect(const VALUE self) {
893
819
  re2_pattern *p;
894
- VALUE result;
895
- ostringstream output;
896
820
 
897
821
  Data_Get_Struct(self, re2_pattern, p);
898
822
 
899
- output << "#<RE2::Regexp /" << p->pattern->pattern() << "/>";
823
+ std::ostringstream output;
900
824
 
901
- result = ENCODED_STR_NEW(output.str().data(), output.str().length(),
902
- p->pattern->options().encoding() == RE2::Options::EncodingUTF8 ? "UTF-8" : "ISO-8859-1");
825
+ output << "#<RE2::Regexp /" << p->pattern->pattern() << "/>";
903
826
 
904
- return result;
827
+ return encoded_str_new(output.str().data(), output.str().length(),
828
+ p->pattern->options().encoding());
905
829
  }
906
830
 
907
831
  /*
@@ -916,12 +840,13 @@ static VALUE re2_regexp_inspect(VALUE self) {
916
840
  * re2 = RE2::Regexp.new("woo?")
917
841
  * re2.to_s #=> "woo?"
918
842
  */
919
- static VALUE re2_regexp_to_s(VALUE self) {
843
+ static VALUE re2_regexp_to_s(const VALUE self) {
920
844
  re2_pattern *p;
921
845
  Data_Get_Struct(self, re2_pattern, p);
922
- return ENCODED_STR_NEW(p->pattern->pattern().data(),
846
+
847
+ return encoded_str_new(p->pattern->pattern().data(),
923
848
  p->pattern->pattern().size(),
924
- p->pattern->options().encoding() == RE2::Options::EncodingUTF8 ? "UTF-8" : "ISO-8859-1");
849
+ p->pattern->options().encoding());
925
850
  }
926
851
 
927
852
  /*
@@ -933,9 +858,10 @@ static VALUE re2_regexp_to_s(VALUE self) {
933
858
  * re2 = RE2::Regexp.new("woo?")
934
859
  * re2.ok? #=> true
935
860
  */
936
- static VALUE re2_regexp_ok(VALUE self) {
861
+ static VALUE re2_regexp_ok(const VALUE self) {
937
862
  re2_pattern *p;
938
863
  Data_Get_Struct(self, re2_pattern, p);
864
+
939
865
  return BOOL2RUBY(p->pattern->ok());
940
866
  }
941
867
 
@@ -948,9 +874,10 @@ static VALUE re2_regexp_ok(VALUE self) {
948
874
  * re2 = RE2::Regexp.new("woo?", :utf8 => true)
949
875
  * re2.utf8? #=> true
950
876
  */
951
- static VALUE re2_regexp_utf8(VALUE self) {
877
+ static VALUE re2_regexp_utf8(const VALUE self) {
952
878
  re2_pattern *p;
953
879
  Data_Get_Struct(self, re2_pattern, p);
880
+
954
881
  return BOOL2RUBY(p->pattern->options().encoding() == RE2::Options::EncodingUTF8);
955
882
  }
956
883
 
@@ -963,9 +890,10 @@ static VALUE re2_regexp_utf8(VALUE self) {
963
890
  * re2 = RE2::Regexp.new("woo?", :posix_syntax => true)
964
891
  * re2.posix_syntax? #=> true
965
892
  */
966
- static VALUE re2_regexp_posix_syntax(VALUE self) {
893
+ static VALUE re2_regexp_posix_syntax(const VALUE self) {
967
894
  re2_pattern *p;
968
895
  Data_Get_Struct(self, re2_pattern, p);
896
+
969
897
  return BOOL2RUBY(p->pattern->options().posix_syntax());
970
898
  }
971
899
 
@@ -978,9 +906,10 @@ static VALUE re2_regexp_posix_syntax(VALUE self) {
978
906
  * re2 = RE2::Regexp.new("woo?", :longest_match => true)
979
907
  * re2.longest_match? #=> true
980
908
  */
981
- static VALUE re2_regexp_longest_match(VALUE self) {
909
+ static VALUE re2_regexp_longest_match(const VALUE self) {
982
910
  re2_pattern *p;
983
911
  Data_Get_Struct(self, re2_pattern, p);
912
+
984
913
  return BOOL2RUBY(p->pattern->options().longest_match());
985
914
  }
986
915
 
@@ -993,9 +922,10 @@ static VALUE re2_regexp_longest_match(VALUE self) {
993
922
  * re2 = RE2::Regexp.new("woo?", :log_errors => true)
994
923
  * re2.log_errors? #=> true
995
924
  */
996
- static VALUE re2_regexp_log_errors(VALUE self) {
925
+ static VALUE re2_regexp_log_errors(const VALUE self) {
997
926
  re2_pattern *p;
998
927
  Data_Get_Struct(self, re2_pattern, p);
928
+
999
929
  return BOOL2RUBY(p->pattern->options().log_errors());
1000
930
  }
1001
931
 
@@ -1008,9 +938,10 @@ static VALUE re2_regexp_log_errors(VALUE self) {
1008
938
  * re2 = RE2::Regexp.new("woo?", :max_mem => 1024)
1009
939
  * re2.max_mem #=> 1024
1010
940
  */
1011
- static VALUE re2_regexp_max_mem(VALUE self) {
941
+ static VALUE re2_regexp_max_mem(const VALUE self) {
1012
942
  re2_pattern *p;
1013
943
  Data_Get_Struct(self, re2_pattern, p);
944
+
1014
945
  return INT2FIX(p->pattern->options().max_mem());
1015
946
  }
1016
947
 
@@ -1023,9 +954,10 @@ static VALUE re2_regexp_max_mem(VALUE self) {
1023
954
  * re2 = RE2::Regexp.new("woo?", :literal => true)
1024
955
  * re2.literal? #=> true
1025
956
  */
1026
- static VALUE re2_regexp_literal(VALUE self) {
957
+ static VALUE re2_regexp_literal(const VALUE self) {
1027
958
  re2_pattern *p;
1028
959
  Data_Get_Struct(self, re2_pattern, p);
960
+
1029
961
  return BOOL2RUBY(p->pattern->options().literal());
1030
962
  }
1031
963
 
@@ -1038,9 +970,10 @@ static VALUE re2_regexp_literal(VALUE self) {
1038
970
  * re2 = RE2::Regexp.new("woo?", :never_nl => true)
1039
971
  * re2.never_nl? #=> true
1040
972
  */
1041
- static VALUE re2_regexp_never_nl(VALUE self) {
973
+ static VALUE re2_regexp_never_nl(const VALUE self) {
1042
974
  re2_pattern *p;
1043
975
  Data_Get_Struct(self, re2_pattern, p);
976
+
1044
977
  return BOOL2RUBY(p->pattern->options().never_nl());
1045
978
  }
1046
979
 
@@ -1053,9 +986,10 @@ static VALUE re2_regexp_never_nl(VALUE self) {
1053
986
  * re2 = RE2::Regexp.new("woo?", :case_sensitive => true)
1054
987
  * re2.case_sensitive? #=> true
1055
988
  */
1056
- static VALUE re2_regexp_case_sensitive(VALUE self) {
989
+ static VALUE re2_regexp_case_sensitive(const VALUE self) {
1057
990
  re2_pattern *p;
1058
991
  Data_Get_Struct(self, re2_pattern, p);
992
+
1059
993
  return BOOL2RUBY(p->pattern->options().case_sensitive());
1060
994
  }
1061
995
 
@@ -1069,7 +1003,7 @@ static VALUE re2_regexp_case_sensitive(VALUE self) {
1069
1003
  * re2.case_insensitive? #=> false
1070
1004
  * re2.casefold? #=> false
1071
1005
  */
1072
- static VALUE re2_regexp_case_insensitive(VALUE self) {
1006
+ static VALUE re2_regexp_case_insensitive(const VALUE self) {
1073
1007
  return BOOL2RUBY(re2_regexp_case_sensitive(self) != Qtrue);
1074
1008
  }
1075
1009
 
@@ -1082,9 +1016,10 @@ static VALUE re2_regexp_case_insensitive(VALUE self) {
1082
1016
  * re2 = RE2::Regexp.new("woo?", :perl_classes => true)
1083
1017
  * re2.perl_classes? #=> true
1084
1018
  */
1085
- static VALUE re2_regexp_perl_classes(VALUE self) {
1019
+ static VALUE re2_regexp_perl_classes(const VALUE self) {
1086
1020
  re2_pattern *p;
1087
1021
  Data_Get_Struct(self, re2_pattern, p);
1022
+
1088
1023
  return BOOL2RUBY(p->pattern->options().perl_classes());
1089
1024
  }
1090
1025
 
@@ -1097,9 +1032,10 @@ static VALUE re2_regexp_perl_classes(VALUE self) {
1097
1032
  * re2 = RE2::Regexp.new("woo?", :word_boundary => true)
1098
1033
  * re2.word_boundary? #=> true
1099
1034
  */
1100
- static VALUE re2_regexp_word_boundary(VALUE self) {
1035
+ static VALUE re2_regexp_word_boundary(const VALUE self) {
1101
1036
  re2_pattern *p;
1102
1037
  Data_Get_Struct(self, re2_pattern, p);
1038
+
1103
1039
  return BOOL2RUBY(p->pattern->options().word_boundary());
1104
1040
  }
1105
1041
 
@@ -1112,9 +1048,10 @@ static VALUE re2_regexp_word_boundary(VALUE self) {
1112
1048
  * re2 = RE2::Regexp.new("woo?", :one_line => true)
1113
1049
  * re2.one_line? #=> true
1114
1050
  */
1115
- static VALUE re2_regexp_one_line(VALUE self) {
1051
+ static VALUE re2_regexp_one_line(const VALUE self) {
1116
1052
  re2_pattern *p;
1117
1053
  Data_Get_Struct(self, re2_pattern, p);
1054
+
1118
1055
  return BOOL2RUBY(p->pattern->options().one_line());
1119
1056
  }
1120
1057
 
@@ -1124,9 +1061,10 @@ static VALUE re2_regexp_one_line(VALUE self) {
1124
1061
  *
1125
1062
  * @return [String, nil] the error string or nil
1126
1063
  */
1127
- static VALUE re2_regexp_error(VALUE self) {
1064
+ static VALUE re2_regexp_error(const VALUE self) {
1128
1065
  re2_pattern *p;
1129
1066
  Data_Get_Struct(self, re2_pattern, p);
1067
+
1130
1068
  if (p->pattern->ok()) {
1131
1069
  return Qnil;
1132
1070
  } else {
@@ -1144,15 +1082,16 @@ static VALUE re2_regexp_error(VALUE self) {
1144
1082
  *
1145
1083
  * @return [String, nil] the offending portion of the regexp or nil
1146
1084
  */
1147
- static VALUE re2_regexp_error_arg(VALUE self) {
1085
+ static VALUE re2_regexp_error_arg(const VALUE self) {
1148
1086
  re2_pattern *p;
1149
1087
  Data_Get_Struct(self, re2_pattern, p);
1088
+
1150
1089
  if (p->pattern->ok()) {
1151
1090
  return Qnil;
1152
1091
  } else {
1153
- return ENCODED_STR_NEW(p->pattern->error_arg().data(),
1092
+ return encoded_str_new(p->pattern->error_arg().data(),
1154
1093
  p->pattern->error_arg().size(),
1155
- p->pattern->options().encoding() == RE2::Options::EncodingUTF8 ? "UTF-8" : "ISO-8859-1");
1094
+ p->pattern->options().encoding());
1156
1095
  }
1157
1096
  }
1158
1097
 
@@ -1163,9 +1102,10 @@ static VALUE re2_regexp_error_arg(VALUE self) {
1163
1102
  *
1164
1103
  * @return [Integer] the regexp "cost"
1165
1104
  */
1166
- static VALUE re2_regexp_program_size(VALUE self) {
1105
+ static VALUE re2_regexp_program_size(const VALUE self) {
1167
1106
  re2_pattern *p;
1168
1107
  Data_Get_Struct(self, re2_pattern, p);
1108
+
1169
1109
  return INT2FIX(p->pattern->ProgramSize());
1170
1110
  }
1171
1111
 
@@ -1175,12 +1115,11 @@ static VALUE re2_regexp_program_size(VALUE self) {
1175
1115
  *
1176
1116
  * @return [Hash] the options
1177
1117
  */
1178
- static VALUE re2_regexp_options(VALUE self) {
1179
- VALUE options;
1118
+ static VALUE re2_regexp_options(const VALUE self) {
1180
1119
  re2_pattern *p;
1181
1120
 
1182
1121
  Data_Get_Struct(self, re2_pattern, p);
1183
- options = rb_hash_new();
1122
+ VALUE options = rb_hash_new();
1184
1123
 
1185
1124
  rb_hash_aset(options, ID2SYM(id_utf8),
1186
1125
  BOOL2RUBY(p->pattern->options().encoding() == RE2::Options::EncodingUTF8));
@@ -1228,10 +1167,10 @@ static VALUE re2_regexp_options(VALUE self) {
1228
1167
  *
1229
1168
  * @return [Integer] the number of capturing subpatterns
1230
1169
  */
1231
- static VALUE re2_regexp_number_of_capturing_groups(VALUE self) {
1170
+ static VALUE re2_regexp_number_of_capturing_groups(const VALUE self) {
1232
1171
  re2_pattern *p;
1233
-
1234
1172
  Data_Get_Struct(self, re2_pattern, p);
1173
+
1235
1174
  return INT2FIX(p->pattern->NumberOfCapturingGroups());
1236
1175
  }
1237
1176
 
@@ -1244,21 +1183,18 @@ static VALUE re2_regexp_number_of_capturing_groups(VALUE self) {
1244
1183
  *
1245
1184
  * @return [Hash] a hash of names to capturing indices
1246
1185
  */
1247
- static VALUE re2_regexp_named_capturing_groups(VALUE self) {
1248
- VALUE capturing_groups;
1186
+ static VALUE re2_regexp_named_capturing_groups(const VALUE self) {
1249
1187
  re2_pattern *p;
1250
- map<string, int> groups;
1251
- map<string, int>::iterator iterator;
1252
1188
 
1253
1189
  Data_Get_Struct(self, re2_pattern, p);
1254
- groups = p->pattern->NamedCapturingGroups();
1255
- capturing_groups = rb_hash_new();
1190
+ const std::map<std::string, int>& groups = p->pattern->NamedCapturingGroups();
1191
+ VALUE capturing_groups = rb_hash_new();
1256
1192
 
1257
- for (iterator = groups.begin(); iterator != groups.end(); iterator++) {
1193
+ for (std::map<std::string, int>::const_iterator it = groups.begin(); it != groups.end(); ++it) {
1258
1194
  rb_hash_aset(capturing_groups,
1259
- ENCODED_STR_NEW(iterator->first.data(), iterator->first.size(),
1260
- p->pattern->options().encoding() == RE2::Options::EncodingUTF8 ? "UTF-8" : "ISO-8859-1"),
1261
- INT2FIX(iterator->second));
1195
+ encoded_str_new(it->first.data(), it->first.size(),
1196
+ p->pattern->options().encoding()),
1197
+ INT2FIX(it->second));
1262
1198
  }
1263
1199
 
1264
1200
  return capturing_groups;
@@ -1316,20 +1252,20 @@ static VALUE re2_regexp_named_capturing_groups(VALUE self) {
1316
1252
  * r.match('woo', 1) #=> #<RE2::MatchData "woo" 1:"o">
1317
1253
  * r.match('woo', 3) #=> #<RE2::MatchData "woo" 1:"o" 2:"o" 3:nil>
1318
1254
  */
1319
- static VALUE re2_regexp_match(int argc, VALUE *argv, VALUE self) {
1320
- int n;
1321
- bool matched;
1255
+ static VALUE re2_regexp_match(int argc, VALUE *argv, const VALUE self) {
1322
1256
  re2_pattern *p;
1323
1257
  re2_matchdata *m;
1324
- VALUE text, number_of_matches, matchdata;
1258
+ VALUE text, number_of_matches;
1325
1259
 
1326
1260
  rb_scan_args(argc, argv, "11", &text, &number_of_matches);
1327
1261
 
1328
1262
  /* Ensure text is a string. */
1329
- text = StringValue(text);
1263
+ StringValue(text);
1330
1264
 
1331
1265
  Data_Get_Struct(self, re2_pattern, p);
1332
1266
 
1267
+ int n;
1268
+
1333
1269
  if (RTEST(number_of_matches)) {
1334
1270
  n = NUM2INT(number_of_matches);
1335
1271
 
@@ -1345,17 +1281,21 @@ static VALUE re2_regexp_match(int argc, VALUE *argv, VALUE self) {
1345
1281
  }
1346
1282
 
1347
1283
  if (n == 0) {
1348
- matched = match(p->pattern, StringValuePtr(text), 0,
1349
- static_cast<int>(RSTRING_LEN(text)), RE2::UNANCHORED, 0, 0);
1284
+ #ifdef HAVE_ENDPOS_ARGUMENT
1285
+ bool matched = p->pattern->Match(StringValuePtr(text), 0,
1286
+ RSTRING_LEN(text), RE2::UNANCHORED, 0, 0);
1287
+ #else
1288
+ bool matched = p->pattern->Match(StringValuePtr(text), 0, RE2::UNANCHORED,
1289
+ 0, 0);
1290
+ #endif
1350
1291
  return BOOL2RUBY(matched);
1351
1292
  } else {
1352
-
1353
1293
  /* Because match returns the whole match as well. */
1354
1294
  n += 1;
1355
1295
 
1356
- matchdata = rb_class_new_instance(0, 0, re2_cMatchData);
1296
+ VALUE matchdata = rb_class_new_instance(0, 0, re2_cMatchData);
1357
1297
  Data_Get_Struct(matchdata, re2_matchdata, m);
1358
- m->matches = new(nothrow) re2::StringPiece[n];
1298
+ m->matches = new(std::nothrow) re2::StringPiece[n];
1359
1299
  m->regexp = self;
1360
1300
  m->text = rb_str_dup(text);
1361
1301
  rb_str_freeze(m->text);
@@ -1367,10 +1307,13 @@ static VALUE re2_regexp_match(int argc, VALUE *argv, VALUE self) {
1367
1307
 
1368
1308
  m->number_of_matches = n;
1369
1309
 
1370
- matched = match(p->pattern, StringValuePtr(m->text), 0,
1371
- static_cast<int>(RSTRING_LEN(m->text)),
1372
- RE2::UNANCHORED, m->matches, n);
1373
-
1310
+ #ifdef HAVE_ENDPOS_ARGUMENT
1311
+ bool matched = p->pattern->Match(StringValuePtr(m->text), 0,
1312
+ RSTRING_LEN(m->text), RE2::UNANCHORED, m->matches, n);
1313
+ #else
1314
+ bool matched = p->pattern->Match(StringValuePtr(m->text), 0,
1315
+ RE2::UNANCHORED, m->matches, n);
1316
+ #endif
1374
1317
  if (matched) {
1375
1318
  return matchdata;
1376
1319
  } else {
@@ -1385,10 +1328,8 @@ static VALUE re2_regexp_match(int argc, VALUE *argv, VALUE self) {
1385
1328
  *
1386
1329
  * @return [Boolean] whether the match was successful
1387
1330
  */
1388
- static VALUE re2_regexp_match_p(VALUE self, VALUE text) {
1389
- VALUE argv[2];
1390
- argv[0] = text;
1391
- argv[1] = INT2FIX(0);
1331
+ static VALUE re2_regexp_match_p(const VALUE self, VALUE text) {
1332
+ VALUE argv[2] = { text, INT2FIX(0) };
1392
1333
 
1393
1334
  return re2_regexp_match(2, argv, self);
1394
1335
  }
@@ -1399,16 +1340,15 @@ static VALUE re2_regexp_match_p(VALUE self, VALUE text) {
1399
1340
  * @example
1400
1341
  * c = RE2::Regexp.new('(\w+)').scan("Foo bar baz")
1401
1342
  */
1402
- static VALUE re2_regexp_scan(VALUE self, VALUE text) {
1343
+ static VALUE re2_regexp_scan(const VALUE self, VALUE text) {
1403
1344
  re2_pattern *p;
1404
1345
  re2_scanner *c;
1405
- VALUE scanner;
1406
1346
 
1407
1347
  Data_Get_Struct(self, re2_pattern, p);
1408
- scanner = rb_class_new_instance(0, 0, re2_cScanner);
1348
+ VALUE scanner = rb_class_new_instance(0, 0, re2_cScanner);
1409
1349
  Data_Get_Struct(scanner, re2_scanner, c);
1410
1350
 
1411
- c->input = new(nothrow) re2::StringPiece(StringValuePtr(text));
1351
+ c->input = new(std::nothrow) re2::StringPiece(StringValuePtr(text));
1412
1352
  c->regexp = self;
1413
1353
  c->text = text;
1414
1354
 
@@ -1448,23 +1388,21 @@ static VALUE re2_Replace(VALUE self, VALUE str, VALUE pattern,
1448
1388
  /* Take a copy of str so it can be modified in-place by
1449
1389
  * RE2::Replace.
1450
1390
  */
1451
- string str_as_string(StringValuePtr(str));
1391
+ std::string str_as_string(StringValuePtr(str));
1452
1392
 
1453
1393
  /* Do the replacement. */
1454
1394
  if (rb_obj_is_kind_of(pattern, re2_cRegexp)) {
1455
1395
  Data_Get_Struct(pattern, re2_pattern, p);
1456
1396
  RE2::Replace(&str_as_string, *p->pattern, StringValuePtr(rewrite));
1457
1397
 
1458
- return ENCODED_STR_NEW(str_as_string.data(), str_as_string.size(),
1459
- p->pattern->options().encoding() == RE2::Options::EncodingUTF8 ? "UTF-8" : "ISO-8859-1");
1398
+ return encoded_str_new(str_as_string.data(), str_as_string.size(),
1399
+ p->pattern->options().encoding());
1460
1400
  } else {
1461
1401
  RE2::Replace(&str_as_string, StringValuePtr(pattern),
1462
1402
  StringValuePtr(rewrite));
1463
1403
 
1464
- return ENCODED_STR_NEW(str_as_string.data(), str_as_string.size(),
1465
- "UTF-8");
1404
+ return encoded_str_new(str_as_string.data(), str_as_string.size(), RE2::Options::EncodingUTF8);
1466
1405
  }
1467
-
1468
1406
  }
1469
1407
 
1470
1408
  /*
@@ -1491,21 +1429,20 @@ static VALUE re2_GlobalReplace(VALUE self, VALUE str, VALUE pattern,
1491
1429
  * RE2::GlobalReplace.
1492
1430
  */
1493
1431
  re2_pattern *p;
1494
- string str_as_string(StringValuePtr(str));
1432
+ std::string str_as_string(StringValuePtr(str));
1495
1433
 
1496
1434
  /* Do the replacement. */
1497
1435
  if (rb_obj_is_kind_of(pattern, re2_cRegexp)) {
1498
1436
  Data_Get_Struct(pattern, re2_pattern, p);
1499
1437
  RE2::GlobalReplace(&str_as_string, *p->pattern, StringValuePtr(rewrite));
1500
1438
 
1501
- return ENCODED_STR_NEW(str_as_string.data(), str_as_string.size(),
1502
- p->pattern->options().encoding() == RE2::Options::EncodingUTF8 ? "UTF-8" : "ISO-8859-1");
1439
+ return encoded_str_new(str_as_string.data(), str_as_string.size(),
1440
+ p->pattern->options().encoding());
1503
1441
  } else {
1504
1442
  RE2::GlobalReplace(&str_as_string, StringValuePtr(pattern),
1505
1443
  StringValuePtr(rewrite));
1506
1444
 
1507
- return ENCODED_STR_NEW(str_as_string.data(), str_as_string.size(),
1508
- "UTF-8");
1445
+ return encoded_str_new(str_as_string.data(), str_as_string.size(), RE2::Options::EncodingUTF8);
1509
1446
  }
1510
1447
  }
1511
1448
 
@@ -1521,11 +1458,12 @@ static VALUE re2_GlobalReplace(VALUE self, VALUE str, VALUE pattern,
1521
1458
  */
1522
1459
  static VALUE re2_QuoteMeta(VALUE self, VALUE unquoted) {
1523
1460
  UNUSED(self);
1524
- string quoted_string = RE2::QuoteMeta(StringValuePtr(unquoted));
1461
+ std::string quoted_string = RE2::QuoteMeta(StringValuePtr(unquoted));
1462
+
1525
1463
  return rb_str_new(quoted_string.data(), quoted_string.size());
1526
1464
  }
1527
1465
 
1528
- void re2_set_free(re2_set *self) {
1466
+ static void re2_set_free(re2_set *self) {
1529
1467
  if (self->set) {
1530
1468
  delete self->set;
1531
1469
  }
@@ -1535,6 +1473,7 @@ void re2_set_free(re2_set *self) {
1535
1473
  static VALUE re2_set_allocate(VALUE klass) {
1536
1474
  re2_set *s;
1537
1475
  VALUE result = Data_Make_Struct(klass, re2_set, 0, re2_set_free, s);
1476
+
1538
1477
  return result;
1539
1478
  }
1540
1479
 
@@ -1582,18 +1521,16 @@ static VALUE re2_set_allocate(VALUE klass) {
1582
1521
  static VALUE re2_set_initialize(int argc, VALUE *argv, VALUE self) {
1583
1522
  VALUE anchor, options;
1584
1523
  re2_set *s;
1585
- RE2::Anchor re2_anchor;
1524
+ RE2::Anchor re2_anchor = RE2::UNANCHORED;
1586
1525
  RE2::Options re2_options;
1587
1526
 
1588
1527
  rb_scan_args(argc, argv, "02", &anchor, &options);
1589
1528
  Data_Get_Struct(self, re2_set, s);
1590
1529
 
1591
1530
  if (RTEST(options)) {
1592
- parse_re2_options(re2_options, options);
1531
+ parse_re2_options(&re2_options, options);
1593
1532
  }
1594
- if (NIL_P(anchor)) {
1595
- re2_anchor = RE2::UNANCHORED;
1596
- } else {
1533
+ if (!NIL_P(anchor)) {
1597
1534
  Check_Type(anchor, T_SYMBOL);
1598
1535
  ID id_anchor = SYM2ID(anchor);
1599
1536
  if (id_anchor == id_unanchored) {
@@ -1607,7 +1544,7 @@ static VALUE re2_set_initialize(int argc, VALUE *argv, VALUE self) {
1607
1544
  }
1608
1545
  }
1609
1546
 
1610
- s->set = new(nothrow) RE2::Set(re2_options, re2_anchor);
1547
+ s->set = new(std::nothrow) RE2::Set(re2_options, re2_anchor);
1611
1548
  if (s->set == 0) {
1612
1549
  rb_raise(rb_eNoMemError, "not enough memory to allocate RE2::Set object");
1613
1550
  }
@@ -1715,26 +1652,27 @@ static VALUE re2_set_match_raises_errors_p(VALUE self) {
1715
1652
  * set.compile
1716
1653
  * set.match("abcdef", :exception => true) # => [0, 1]
1717
1654
  */
1718
- static VALUE re2_set_match(int argc, VALUE *argv, VALUE self) {
1719
- VALUE str, options, exception_option;
1655
+ static VALUE re2_set_match(int argc, VALUE *argv, const VALUE self) {
1656
+ VALUE str, options;
1720
1657
  bool raise_exception = true;
1721
1658
  rb_scan_args(argc, argv, "11", &str, &options);
1722
1659
 
1723
1660
  StringValue(str);
1724
1661
  re2::StringPiece data(RSTRING_PTR(str), RSTRING_LEN(str));
1725
- std::vector<int> v;
1726
1662
  re2_set *s;
1727
1663
  Data_Get_Struct(self, re2_set, s);
1728
1664
 
1729
1665
  if (RTEST(options)) {
1730
1666
  Check_Type(options, T_HASH);
1731
1667
 
1732
- exception_option = rb_hash_aref(options, ID2SYM(id_exception));
1668
+ VALUE exception_option = rb_hash_aref(options, ID2SYM(id_exception));
1733
1669
  if (!NIL_P(exception_option)) {
1734
1670
  raise_exception = RTEST(exception_option);
1735
1671
  }
1736
1672
  }
1737
1673
 
1674
+ std::vector<int> v;
1675
+
1738
1676
  if (raise_exception) {
1739
1677
  #ifdef HAVE_ERROR_INFO_ARGUMENT
1740
1678
  RE2::Set::ErrorInfo e;
@@ -1755,7 +1693,7 @@ static VALUE re2_set_match(int argc, VALUE *argv, VALUE self) {
1755
1693
  rb_raise(re2_eSetMatchError, "Unknown RE2::Set::ErrorKind: %d", e.kind);
1756
1694
  }
1757
1695
  } else {
1758
- for (size_t i = 0; i < v.size(); i++) {
1696
+ for (std::vector<int>::size_type i = 0; i < v.size(); ++i) {
1759
1697
  rb_ary_push(result, INT2FIX(v[i]));
1760
1698
  }
1761
1699
  }
@@ -1769,7 +1707,7 @@ static VALUE re2_set_match(int argc, VALUE *argv, VALUE self) {
1769
1707
  VALUE result = rb_ary_new2(v.size());
1770
1708
 
1771
1709
  if (matched) {
1772
- for (size_t i = 0; i < v.size(); i++) {
1710
+ for (std::vector<int>::size_type i = 0; i < v.size(); ++i) {
1773
1711
  rb_ary_push(result, INT2FIX(v[i]));
1774
1712
  }
1775
1713
  }
@@ -1919,7 +1857,7 @@ void Init_re2(void) {
1919
1857
  rb_define_singleton_method(re2_cRegexp, "compile",
1920
1858
  RUBY_METHOD_FUNC(rb_class_new_instance), -1);
1921
1859
 
1922
- rb_define_global_function("RE2", RUBY_METHOD_FUNC(re2_re2), -1);
1860
+ rb_define_module_function(rb_mKernel, "RE2", RUBY_METHOD_FUNC(re2_re2), -1);
1923
1861
 
1924
1862
  /* Create the symbols used in options. */
1925
1863
  id_utf8 = rb_intern("utf8");
data/lib/2.6/re2.so CHANGED
Binary file
data/lib/2.7/re2.so CHANGED
Binary file
data/lib/3.0/re2.so CHANGED
Binary file
data/lib/3.1/re2.so CHANGED
Binary file
data/lib/3.2/re2.so CHANGED
Binary file
data/lib/re2/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module RE2
4
- VERSION = "2.1.0"
4
+ VERSION = "2.1.1"
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: re2
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.1.0
4
+ version: 2.1.1
5
5
  platform: x86_64-linux
6
6
  authors:
7
7
  - Paul Mucur
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2023-09-16 00:00:00.000000000 Z
12
+ date: 2023-09-18 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rake-compiler