re2 2.1.0-aarch64-linux → 2.1.1-aarch64-linux

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 45947f6373b11e837536d051bdc98220f9f5250978084a022d6451a5db79523e
4
- data.tar.gz: 16cb5427d672e322830487536edf450bb05af54ab37e5b981b779f1e3ce763f2
3
+ metadata.gz: 67725fc0aae760fd998781e242784be8ad5e9b90de9eb8b08d2c07e615ea1e8c
4
+ data.tar.gz: 67417a6fe6a337403021f5da423c00f271cd431320937688919a0aa0c3011d28
5
5
  SHA512:
6
- metadata.gz: 9dfaa89933d17387e550481a9c53d49df3d5d533fe77ed697a9d0b173cee324e30439e97976f65025e69c81d8f458db313a90abd0fd34d21d171c2c5294197d5
7
- data.tar.gz: d028c072c3fc49072cd4f69d53c25e8167e5ebf203ed27ede7f42dace3aa3b396eece4cb7c83081aacf12836340d0f971005e9639ebcbba0353a46a322809379
6
+ metadata.gz: 0133032a93486e03a7016ab346fd8efadfc6ba45da6ebc5642a3a314112cd35e08456e2314bd95b2d20fd67cef33f038319563d3f1ae06a299cbc29b80f6719c
7
+ data.tar.gz: 333122a551f33aea3e99e2e4758aee43d8c37f04e0f4eeca5b0ec566ffca2d7ef975b745beacda78bb736350a16ec762e3de2f54d151ecceb0507b96774f03c4
data/README.md CHANGED
@@ -5,7 +5,7 @@ Ruby bindings to [RE2][], a "fast, safe, thread-friendly alternative to
5
5
  backtracking regular expression engines like those used in PCRE, Perl, and
6
6
  Python".
7
7
 
8
- **Current version:** 2.1.0
8
+ **Current version:** 2.1.1
9
9
  **Supported Ruby versions:** 2.6, 2.7, 3.0, 3.1, 3.2
10
10
  **Bundled RE2 version:** libre2.11 (2023-09-01)
11
11
  **Supported RE2 versions:** libre2.0 (< 2020-03-02), libre2.1 (2020-03-02), libre2.6 (2020-03-03), libre2.7 (2020-05-01), libre2.8 (2020-07-06), libre2.9 (2020-11-01), libre2.10 (2022-12-01), libre2.11 (2023-07-01)
data/ext/re2/extconf.rb CHANGED
@@ -128,7 +128,6 @@ def build_extension(static_p = false)
128
128
 
129
129
  have_library("stdc++")
130
130
  have_header("stdint.h")
131
- have_func("rb_str_sublen")
132
131
 
133
132
  if !static_p and !have_library("re2")
134
133
  abort "You must have re2 installed and specified with --with-re2-dir, please see https://github.com/google/re2/wiki/Install"
data/ext/re2/re2.cc CHANGED
@@ -6,68 +6,21 @@
6
6
  * Released under the BSD Licence, please see LICENSE.txt
7
7
  */
8
8
 
9
- #include <ruby.h>
10
- #include <re2/re2.h>
11
- #include <re2/set.h>
12
9
  #include <stdint.h>
13
- #include <string>
10
+
11
+ #include <map>
14
12
  #include <sstream>
13
+ #include <string>
15
14
  #include <vector>
16
- using std::string;
17
- using std::ostringstream;
18
- using std::nothrow;
19
- using std::map;
20
- using std::vector;
15
+
16
+ #include <re2/re2.h>
17
+ #include <re2/set.h>
18
+ #include <ruby.h>
19
+ #include <ruby/encoding.h>
21
20
 
22
21
  #define BOOL2RUBY(v) (v ? Qtrue : Qfalse)
23
22
  #define UNUSED(x) ((void)x)
24
23
 
25
- #ifndef RSTRING_LEN
26
- #define RSTRING_LEN(x) (RSTRING(x)->len)
27
- #endif
28
-
29
- #ifndef RSTRING_PTR
30
- #define RSTRING_PTR(x) (RSTRING(x)->ptr)
31
- #endif
32
-
33
- #ifdef HAVE_RUBY_ENCODING_H
34
- #include <ruby/encoding.h>
35
- #define ENCODED_STR_NEW(str, length, encoding) \
36
- ({ \
37
- VALUE _string = rb_str_new(str, length); \
38
- int _enc = rb_enc_find_index(encoding); \
39
- rb_enc_associate_index(_string, _enc); \
40
- _string; \
41
- })
42
- #else
43
- #define ENCODED_STR_NEW(str, length, encoding) \
44
- rb_str_new((const char *)str, (long)length)
45
- #endif
46
-
47
- #ifdef HAVE_RB_STR_SUBLEN
48
- #define ENCODED_STR_SUBLEN(str, offset, encoding) \
49
- LONG2NUM(rb_str_sublen(str, offset))
50
- #else
51
- #ifdef HAVE_RUBY_ENCODING_H
52
- #define ENCODED_STR_SUBLEN(str, offset, encoding) \
53
- ({ \
54
- VALUE _string = ENCODED_STR_NEW(RSTRING_PTR(str), offset, encoding); \
55
- rb_str_length(_string); \
56
- })
57
- #else
58
- #define ENCODED_STR_SUBLEN(str, offset, encoding) \
59
- LONG2NUM(offset)
60
- #endif
61
- #endif
62
-
63
- #ifdef HAVE_ENDPOS_ARGUMENT
64
- #define match(pattern, text, startpos, endpos, anchor, match, nmatch) \
65
- (pattern->Match(text, startpos, endpos, anchor, match, nmatch))
66
- #else
67
- #define match(pattern, text, startpos, endpos, anchor, match, nmatch) \
68
- (pattern->Match(text, startpos, anchor, match, nmatch))
69
- #endif
70
-
71
24
  typedef struct {
72
25
  RE2 *pattern;
73
26
  } re2_pattern;
@@ -98,95 +51,103 @@ static ID id_utf8, id_posix_syntax, id_longest_match, id_log_errors,
98
51
  id_perl_classes, id_word_boundary, id_one_line,
99
52
  id_unanchored, id_anchor_start, id_anchor_both, id_exception;
100
53
 
101
- void parse_re2_options(RE2::Options& re2_options, VALUE options) {
54
+ inline VALUE encoded_str_new(const char *str, long length, RE2::Options::Encoding encoding) {
55
+ if (encoding == RE2::Options::EncodingUTF8) {
56
+ return rb_utf8_str_new(str, length);
57
+ }
58
+
59
+ VALUE string = rb_str_new(str, length);
60
+ rb_enc_associate_index(string, rb_enc_find_index("ISO-8859-1"));
61
+
62
+ return string;
63
+ }
64
+
65
+ static void parse_re2_options(RE2::Options* re2_options, const VALUE options) {
102
66
  if (TYPE(options) != T_HASH) {
103
67
  rb_raise(rb_eArgError, "options should be a hash");
104
68
  }
105
- VALUE utf8, posix_syntax, longest_match, log_errors,
106
- max_mem, literal, never_nl, case_sensitive, perl_classes,
107
- word_boundary, one_line;
108
69
 
109
- utf8 = rb_hash_aref(options, ID2SYM(id_utf8));
70
+ VALUE utf8 = rb_hash_aref(options, ID2SYM(id_utf8));
110
71
  if (!NIL_P(utf8)) {
111
- re2_options.set_encoding(RTEST(utf8) ? RE2::Options::EncodingUTF8 : RE2::Options::EncodingLatin1);
72
+ re2_options->set_encoding(RTEST(utf8) ? RE2::Options::EncodingUTF8 : RE2::Options::EncodingLatin1);
112
73
  }
113
74
 
114
- posix_syntax = rb_hash_aref(options, ID2SYM(id_posix_syntax));
75
+ VALUE posix_syntax = rb_hash_aref(options, ID2SYM(id_posix_syntax));
115
76
  if (!NIL_P(posix_syntax)) {
116
- re2_options.set_posix_syntax(RTEST(posix_syntax));
77
+ re2_options->set_posix_syntax(RTEST(posix_syntax));
117
78
  }
118
79
 
119
- longest_match = rb_hash_aref(options, ID2SYM(id_longest_match));
80
+ VALUE longest_match = rb_hash_aref(options, ID2SYM(id_longest_match));
120
81
  if (!NIL_P(longest_match)) {
121
- re2_options.set_longest_match(RTEST(longest_match));
82
+ re2_options->set_longest_match(RTEST(longest_match));
122
83
  }
123
84
 
124
- log_errors = rb_hash_aref(options, ID2SYM(id_log_errors));
85
+ VALUE log_errors = rb_hash_aref(options, ID2SYM(id_log_errors));
125
86
  if (!NIL_P(log_errors)) {
126
- re2_options.set_log_errors(RTEST(log_errors));
87
+ re2_options->set_log_errors(RTEST(log_errors));
127
88
  }
128
89
 
129
- max_mem = rb_hash_aref(options, ID2SYM(id_max_mem));
90
+ VALUE max_mem = rb_hash_aref(options, ID2SYM(id_max_mem));
130
91
  if (!NIL_P(max_mem)) {
131
- re2_options.set_max_mem(NUM2INT(max_mem));
92
+ re2_options->set_max_mem(NUM2INT(max_mem));
132
93
  }
133
94
 
134
- literal = rb_hash_aref(options, ID2SYM(id_literal));
95
+ VALUE literal = rb_hash_aref(options, ID2SYM(id_literal));
135
96
  if (!NIL_P(literal)) {
136
- re2_options.set_literal(RTEST(literal));
97
+ re2_options->set_literal(RTEST(literal));
137
98
  }
138
99
 
139
- never_nl = rb_hash_aref(options, ID2SYM(id_never_nl));
100
+ VALUE never_nl = rb_hash_aref(options, ID2SYM(id_never_nl));
140
101
  if (!NIL_P(never_nl)) {
141
- re2_options.set_never_nl(RTEST(never_nl));
102
+ re2_options->set_never_nl(RTEST(never_nl));
142
103
  }
143
104
 
144
- case_sensitive = rb_hash_aref(options, ID2SYM(id_case_sensitive));
105
+ VALUE case_sensitive = rb_hash_aref(options, ID2SYM(id_case_sensitive));
145
106
  if (!NIL_P(case_sensitive)) {
146
- re2_options.set_case_sensitive(RTEST(case_sensitive));
107
+ re2_options->set_case_sensitive(RTEST(case_sensitive));
147
108
  }
148
109
 
149
- perl_classes = rb_hash_aref(options, ID2SYM(id_perl_classes));
110
+ VALUE perl_classes = rb_hash_aref(options, ID2SYM(id_perl_classes));
150
111
  if (!NIL_P(perl_classes)) {
151
- re2_options.set_perl_classes(RTEST(perl_classes));
112
+ re2_options->set_perl_classes(RTEST(perl_classes));
152
113
  }
153
114
 
154
- word_boundary = rb_hash_aref(options, ID2SYM(id_word_boundary));
115
+ VALUE word_boundary = rb_hash_aref(options, ID2SYM(id_word_boundary));
155
116
  if (!NIL_P(word_boundary)) {
156
- re2_options.set_word_boundary(RTEST(word_boundary));
117
+ re2_options->set_word_boundary(RTEST(word_boundary));
157
118
  }
158
119
 
159
- one_line = rb_hash_aref(options, ID2SYM(id_one_line));
120
+ VALUE one_line = rb_hash_aref(options, ID2SYM(id_one_line));
160
121
  if (!NIL_P(one_line)) {
161
- re2_options.set_one_line(RTEST(one_line));
122
+ re2_options->set_one_line(RTEST(one_line));
162
123
  }
163
124
  }
164
125
 
165
- void re2_matchdata_mark(re2_matchdata* self) {
126
+ static void re2_matchdata_mark(re2_matchdata* self) {
166
127
  rb_gc_mark(self->regexp);
167
128
  rb_gc_mark(self->text);
168
129
  }
169
130
 
170
- void re2_matchdata_free(re2_matchdata* self) {
131
+ static void re2_matchdata_free(re2_matchdata* self) {
171
132
  if (self->matches) {
172
133
  delete[] self->matches;
173
134
  }
174
135
  free(self);
175
136
  }
176
137
 
177
- void re2_scanner_mark(re2_scanner* self) {
138
+ static void re2_scanner_mark(re2_scanner* self) {
178
139
  rb_gc_mark(self->regexp);
179
140
  rb_gc_mark(self->text);
180
141
  }
181
142
 
182
- void re2_scanner_free(re2_scanner* self) {
143
+ static void re2_scanner_free(re2_scanner* self) {
183
144
  if (self->input) {
184
145
  delete self->input;
185
146
  }
186
147
  free(self);
187
148
  }
188
149
 
189
- void re2_regexp_free(re2_pattern* self) {
150
+ static void re2_regexp_free(re2_pattern* self) {
190
151
  if (self->pattern) {
191
152
  delete self->pattern;
192
153
  }
@@ -195,12 +156,14 @@ void re2_regexp_free(re2_pattern* self) {
195
156
 
196
157
  static VALUE re2_matchdata_allocate(VALUE klass) {
197
158
  re2_matchdata *m;
159
+
198
160
  return Data_Make_Struct(klass, re2_matchdata, re2_matchdata_mark,
199
161
  re2_matchdata_free, m);
200
162
  }
201
163
 
202
164
  static VALUE re2_scanner_allocate(VALUE klass) {
203
165
  re2_scanner *c;
166
+
204
167
  return Data_Make_Struct(klass, re2_scanner, re2_scanner_mark,
205
168
  re2_scanner_free, c);
206
169
  }
@@ -213,7 +176,7 @@ static VALUE re2_scanner_allocate(VALUE klass) {
213
176
  * m = RE2::Regexp.new('(\d+)').match("bob 123")
214
177
  * m.string #=> "bob 123"
215
178
  */
216
- static VALUE re2_matchdata_string(VALUE self) {
179
+ static VALUE re2_matchdata_string(const VALUE self) {
217
180
  re2_matchdata *m;
218
181
  Data_Get_Struct(self, re2_matchdata, m);
219
182
 
@@ -228,7 +191,7 @@ static VALUE re2_matchdata_string(VALUE self) {
228
191
  * c = RE2::Regexp.new('(\d+)').scan("foo")
229
192
  * c.string #=> "foo"
230
193
  */
231
- static VALUE re2_scanner_string(VALUE self) {
194
+ static VALUE re2_scanner_string(const VALUE self) {
232
195
  re2_scanner *c;
233
196
  Data_Get_Struct(self, re2_scanner, c);
234
197
 
@@ -243,7 +206,7 @@ static VALUE re2_scanner_string(VALUE self) {
243
206
  * c = RE2::Regexp.new('(\d+)').scan("foo")
244
207
  * c.eof? #=> true
245
208
  */
246
- static VALUE re2_scanner_eof(VALUE self) {
209
+ static VALUE re2_scanner_eof(const VALUE self) {
247
210
  re2_scanner *c;
248
211
  Data_Get_Struct(self, re2_scanner, c);
249
212
 
@@ -265,7 +228,7 @@ static VALUE re2_scanner_rewind(VALUE self) {
265
228
  re2_scanner *c;
266
229
  Data_Get_Struct(self, re2_scanner, c);
267
230
 
268
- c->input = new(nothrow) re2::StringPiece(StringValuePtr(c->text));
231
+ c->input = new(std::nothrow) re2::StringPiece(StringValuePtr(c->text));
269
232
  c->eof = false;
270
233
 
271
234
  return self;
@@ -286,44 +249,41 @@ static VALUE re2_scanner_rewind(VALUE self) {
286
249
  * s.scan #=> ["bar"]
287
250
  */
288
251
  static VALUE re2_scanner_scan(VALUE self) {
289
- int i;
290
- size_t original_input_size, new_input_size;
291
- bool input_advanced;
292
252
  re2_pattern *p;
293
253
  re2_scanner *c;
294
- VALUE result;
295
254
 
296
255
  Data_Get_Struct(self, re2_scanner, c);
297
256
  Data_Get_Struct(c->regexp, re2_pattern, p);
298
257
 
299
- vector<RE2::Arg> argv(c->number_of_capturing_groups);
300
- vector<RE2::Arg*> args(c->number_of_capturing_groups);
301
- vector<string> matches(c->number_of_capturing_groups);
258
+ std::vector<RE2::Arg> argv(c->number_of_capturing_groups);
259
+ std::vector<RE2::Arg*> args(c->number_of_capturing_groups);
260
+ std::vector<std::string> matches(c->number_of_capturing_groups);
302
261
 
303
262
  if (c->eof) {
304
263
  return Qnil;
305
264
  }
306
265
 
307
- original_input_size = c->input->size();
266
+ re2::StringPiece::size_type original_input_size = c->input->size();
308
267
 
309
- for (i = 0; i < c->number_of_capturing_groups; i++) {
268
+ for (int i = 0; i < c->number_of_capturing_groups; ++i) {
310
269
  argv[i] = &matches[i];
311
270
  args[i] = &argv[i];
312
271
  }
313
272
 
314
273
  if (RE2::FindAndConsumeN(c->input, *p->pattern, &args[0],
315
274
  c->number_of_capturing_groups)) {
316
- result = rb_ary_new2(c->number_of_capturing_groups);
317
- new_input_size = c->input->size();
318
- input_advanced = new_input_size < original_input_size;
275
+ re2::StringPiece::size_type new_input_size = c->input->size();
276
+ bool input_advanced = new_input_size < original_input_size;
277
+
278
+ VALUE result = rb_ary_new2(c->number_of_capturing_groups);
319
279
 
320
- for (i = 0; i < c->number_of_capturing_groups; i++) {
280
+ for (int i = 0; i < c->number_of_capturing_groups; ++i) {
321
281
  if (matches[i].empty()) {
322
282
  rb_ary_push(result, Qnil);
323
283
  } else {
324
- rb_ary_push(result, ENCODED_STR_NEW(matches[i].data(),
284
+ rb_ary_push(result, encoded_str_new(matches[i].data(),
325
285
  matches[i].size(),
326
- p->pattern->options().encoding() == RE2::Options::EncodingUTF8 ? "UTF-8" : "ISO-8859-1"));
286
+ p->pattern->options().encoding()));
327
287
  }
328
288
  }
329
289
 
@@ -334,47 +294,40 @@ static VALUE re2_scanner_scan(VALUE self) {
334
294
  if (!input_advanced && new_input_size > 0) {
335
295
  c->input->remove_prefix(1);
336
296
  }
297
+
298
+ return result;
337
299
  } else {
338
- result = Qnil;
300
+ return Qnil;
339
301
  }
340
-
341
- return result;
342
302
  }
343
303
 
344
304
  /*
345
305
  * Retrieve a matchdata by index or name.
346
306
  */
347
- re2::StringPiece *re2_matchdata_find_match(VALUE idx, VALUE self) {
348
- int id;
307
+ static re2::StringPiece *re2_matchdata_find_match(VALUE idx, const VALUE self) {
349
308
  re2_matchdata *m;
350
309
  re2_pattern *p;
351
- map<string, int> groups;
352
- string name;
353
- re2::StringPiece *match;
354
310
 
355
311
  Data_Get_Struct(self, re2_matchdata, m);
356
312
  Data_Get_Struct(m->regexp, re2_pattern, p);
357
313
 
314
+ int id;
315
+
358
316
  if (FIXNUM_P(idx)) {
359
317
  id = FIX2INT(idx);
360
318
  } else {
361
- if (SYMBOL_P(idx)) {
362
- name = rb_id2name(SYM2ID(idx));
363
- } else {
364
- name = StringValuePtr(idx);
365
- }
319
+ const char *name = SYMBOL_P(idx) ? rb_id2name(SYM2ID(idx)) : StringValuePtr(idx);
320
+ const std::map<std::string, int>& groups = p->pattern->NamedCapturingGroups();
366
321
 
367
- groups = p->pattern->NamedCapturingGroups();
368
-
369
- if (groups.count(name) == 1) {
370
- id = groups[name];
322
+ if (std::map<std::string, int>::const_iterator search = groups.find(name); search != groups.end()) {
323
+ id = search->second;
371
324
  } else {
372
325
  return NULL;
373
326
  }
374
327
  }
375
328
 
376
329
  if (id >= 0 && id < m->number_of_matches) {
377
- match = &m->matches[id];
330
+ re2::StringPiece *match = &m->matches[id];
378
331
 
379
332
  if (!match->empty()) {
380
333
  return match;
@@ -393,7 +346,7 @@ re2::StringPiece *re2_matchdata_find_match(VALUE idx, VALUE self) {
393
346
  * m.size #=> 2
394
347
  * m.length #=> 2
395
348
  */
396
- static VALUE re2_matchdata_size(VALUE self) {
349
+ static VALUE re2_matchdata_size(const VALUE self) {
397
350
  re2_matchdata *m;
398
351
  Data_Get_Struct(self, re2_matchdata, m);
399
352
 
@@ -410,23 +363,18 @@ static VALUE re2_matchdata_size(VALUE self) {
410
363
  * m.begin(0) #=> 1
411
364
  * m.begin(1) #=> 4
412
365
  */
413
- static VALUE re2_matchdata_begin(VALUE self, VALUE n) {
366
+ static VALUE re2_matchdata_begin(const VALUE self, VALUE n) {
414
367
  re2_matchdata *m;
415
- re2_pattern *p;
416
- re2::StringPiece *match;
417
- long offset;
418
368
 
419
369
  Data_Get_Struct(self, re2_matchdata, m);
420
- Data_Get_Struct(m->regexp, re2_pattern, p);
421
370
 
422
- match = re2_matchdata_find_match(n, self);
371
+ re2::StringPiece *match = re2_matchdata_find_match(n, self);
423
372
  if (match == NULL) {
424
373
  return Qnil;
425
374
  } else {
426
- offset = reinterpret_cast<uintptr_t>(match->data()) - reinterpret_cast<uintptr_t>(StringValuePtr(m->text));
375
+ long offset = match->data() - StringValuePtr(m->text);
427
376
 
428
- return ENCODED_STR_SUBLEN(StringValue(m->text), offset,
429
- p->pattern->options().encoding() == RE2::Options::EncodingUTF8 ? "UTF-8" : "ISO-8859-1");
377
+ return LONG2NUM(rb_str_sublen(StringValue(m->text), offset));
430
378
  }
431
379
  }
432
380
 
@@ -440,24 +388,18 @@ static VALUE re2_matchdata_begin(VALUE self, VALUE n) {
440
388
  * m.end(0) #=> 9
441
389
  * m.end(1) #=> 7
442
390
  */
443
- static VALUE re2_matchdata_end(VALUE self, VALUE n) {
391
+ static VALUE re2_matchdata_end(const VALUE self, VALUE n) {
444
392
  re2_matchdata *m;
445
- re2_pattern *p;
446
- re2::StringPiece *match;
447
- long offset;
448
393
 
449
394
  Data_Get_Struct(self, re2_matchdata, m);
450
- Data_Get_Struct(m->regexp, re2_pattern, p);
451
-
452
- match = re2_matchdata_find_match(n, self);
453
395
 
396
+ re2::StringPiece *match = re2_matchdata_find_match(n, self);
454
397
  if (match == NULL) {
455
398
  return Qnil;
456
399
  } else {
457
- offset = reinterpret_cast<uintptr_t>(match->data()) - reinterpret_cast<uintptr_t>(StringValuePtr(m->text)) + match->size();
400
+ long offset = (match->data() - StringValuePtr(m->text)) + match->size();
458
401
 
459
- return ENCODED_STR_SUBLEN(StringValue(m->text), offset,
460
- p->pattern->options().encoding() == RE2::Options::EncodingUTF8 ? "UTF-8" : "ISO-8859-1");
402
+ return LONG2NUM(rb_str_sublen(StringValue(m->text), offset));
461
403
  }
462
404
  }
463
405
 
@@ -469,9 +411,10 @@ static VALUE re2_matchdata_end(VALUE self, VALUE n) {
469
411
  * m = RE2::Regexp.new('(\d+)').match("bob 123")
470
412
  * m.regexp #=> #<RE2::Regexp /(\d+)/>
471
413
  */
472
- static VALUE re2_matchdata_regexp(VALUE self) {
414
+ static VALUE re2_matchdata_regexp(const VALUE self) {
473
415
  re2_matchdata *m;
474
416
  Data_Get_Struct(self, re2_matchdata, m);
417
+
475
418
  return m->regexp;
476
419
  }
477
420
 
@@ -483,7 +426,7 @@ static VALUE re2_matchdata_regexp(VALUE self) {
483
426
  * c = RE2::Regexp.new('(\d+)').scan("bob 123")
484
427
  * c.regexp #=> #<RE2::Regexp /(\d+)/>
485
428
  */
486
- static VALUE re2_scanner_regexp(VALUE self) {
429
+ static VALUE re2_scanner_regexp(const VALUE self) {
487
430
  re2_scanner *c;
488
431
  Data_Get_Struct(self, re2_scanner, c);
489
432
 
@@ -492,6 +435,7 @@ static VALUE re2_scanner_regexp(VALUE self) {
492
435
 
493
436
  static VALUE re2_regexp_allocate(VALUE klass) {
494
437
  re2_pattern *p;
438
+
495
439
  return Data_Make_Struct(klass, re2_pattern, 0, re2_regexp_free, p);
496
440
  }
497
441
 
@@ -507,35 +451,31 @@ static VALUE re2_regexp_allocate(VALUE klass) {
507
451
  * m = RE2::Regexp.new('(\d+)').match("bob 123")
508
452
  * m.to_a #=> ["123", "123"]
509
453
  */
510
- static VALUE re2_matchdata_to_a(VALUE self) {
511
- int i;
454
+ static VALUE re2_matchdata_to_a(const VALUE self) {
512
455
  re2_matchdata *m;
513
456
  re2_pattern *p;
514
- re2::StringPiece *match;
515
- VALUE array;
516
457
 
517
458
  Data_Get_Struct(self, re2_matchdata, m);
518
459
  Data_Get_Struct(m->regexp, re2_pattern, p);
519
460
 
520
- array = rb_ary_new2(m->number_of_matches);
521
- for (i = 0; i < m->number_of_matches; i++) {
522
- match = &m->matches[i];
461
+ VALUE array = rb_ary_new2(m->number_of_matches);
462
+ for (int i = 0; i < m->number_of_matches; ++i) {
463
+ re2::StringPiece *match = &m->matches[i];
523
464
 
524
465
  if (match->empty()) {
525
466
  rb_ary_push(array, Qnil);
526
467
  } else {
527
- rb_ary_push(array, ENCODED_STR_NEW(match->data(), match->size(),
528
- p->pattern->options().encoding() == RE2::Options::EncodingUTF8 ? "UTF-8" : "ISO-8859-1"));
468
+ rb_ary_push(array, encoded_str_new(match->data(), match->size(),
469
+ p->pattern->options().encoding()));
529
470
  }
530
471
  }
531
472
 
532
473
  return array;
533
474
  }
534
475
 
535
- static VALUE re2_matchdata_nth_match(int nth, VALUE self) {
476
+ static VALUE re2_matchdata_nth_match(int nth, const VALUE self) {
536
477
  re2_matchdata *m;
537
478
  re2_pattern *p;
538
- re2::StringPiece *match;
539
479
 
540
480
  Data_Get_Struct(self, re2_matchdata, m);
541
481
  Data_Get_Struct(m->regexp, re2_pattern, p);
@@ -543,32 +483,28 @@ static VALUE re2_matchdata_nth_match(int nth, VALUE self) {
543
483
  if (nth < 0 || nth >= m->number_of_matches) {
544
484
  return Qnil;
545
485
  } else {
546
- match = &m->matches[nth];
486
+ re2::StringPiece *match = &m->matches[nth];
547
487
 
548
488
  if (match->empty()) {
549
489
  return Qnil;
550
490
  } else {
551
- return ENCODED_STR_NEW(match->data(), match->size(),
552
- p->pattern->options().encoding() == RE2::Options::EncodingUTF8 ? "UTF-8" : "ISO-8859-1");
491
+ return encoded_str_new(match->data(), match->size(),
492
+ p->pattern->options().encoding());
553
493
  }
554
494
  }
555
495
  }
556
496
 
557
- static VALUE re2_matchdata_named_match(const char* name, VALUE self) {
558
- int idx;
497
+ static VALUE re2_matchdata_named_match(const char* name, const VALUE self) {
559
498
  re2_matchdata *m;
560
499
  re2_pattern *p;
561
- map<string, int> groups;
562
- string name_as_string(name);
563
500
 
564
501
  Data_Get_Struct(self, re2_matchdata, m);
565
502
  Data_Get_Struct(m->regexp, re2_pattern, p);
566
503
 
567
- groups = p->pattern->NamedCapturingGroups();
504
+ const std::map<std::string, int>& groups = p->pattern->NamedCapturingGroups();
568
505
 
569
- if (groups.count(name_as_string) == 1) {
570
- idx = groups[name_as_string];
571
- return re2_matchdata_nth_match(idx, self);
506
+ if (std::map<std::string, int>::const_iterator search = groups.find(name); search != groups.end()) {
507
+ return re2_matchdata_nth_match(search->second, self);
572
508
  } else {
573
509
  return Qnil;
574
510
  }
@@ -621,7 +557,7 @@ static VALUE re2_matchdata_named_match(const char* name, VALUE self) {
621
557
  * m["number"] #=> "123"
622
558
  * m[:number] #=> "123"
623
559
  */
624
- static VALUE re2_matchdata_aref(int argc, VALUE *argv, VALUE self) {
560
+ static VALUE re2_matchdata_aref(int argc, VALUE *argv, const VALUE self) {
625
561
  VALUE idx, rest;
626
562
  rb_scan_args(argc, argv, "11", &idx, &rest);
627
563
 
@@ -641,7 +577,7 @@ static VALUE re2_matchdata_aref(int argc, VALUE *argv, VALUE self) {
641
577
  *
642
578
  * @return [String] the entire matched string
643
579
  */
644
- static VALUE re2_matchdata_to_s(VALUE self) {
580
+ static VALUE re2_matchdata_to_s(const VALUE self) {
645
581
  return re2_matchdata_nth_match(0, self);
646
582
  }
647
583
 
@@ -657,26 +593,24 @@ static VALUE re2_matchdata_to_s(VALUE self) {
657
593
  * m = RE2::Regexp.new('(\d+)').match("bob 123")
658
594
  * m.inspect #=> "#<RE2::MatchData \"123\" 1:\"123\">"
659
595
  */
660
- static VALUE re2_matchdata_inspect(VALUE self) {
661
- int i;
596
+ static VALUE re2_matchdata_inspect(const VALUE self) {
662
597
  re2_matchdata *m;
663
598
  re2_pattern *p;
664
- VALUE match, result;
665
- ostringstream output;
666
599
 
667
600
  Data_Get_Struct(self, re2_matchdata, m);
668
601
  Data_Get_Struct(m->regexp, re2_pattern, p);
669
602
 
603
+ std::ostringstream output;
670
604
  output << "#<RE2::MatchData";
671
605
 
672
- for (i = 0; i < m->number_of_matches; i++) {
606
+ for (int i = 0; i < m->number_of_matches; ++i) {
673
607
  output << " ";
674
608
 
675
609
  if (i > 0) {
676
610
  output << i << ":";
677
611
  }
678
612
 
679
- match = re2_matchdata_nth_match(i, self);
613
+ VALUE match = re2_matchdata_nth_match(i, self);
680
614
 
681
615
  if (match == Qnil) {
682
616
  output << "nil";
@@ -687,10 +621,8 @@ static VALUE re2_matchdata_inspect(VALUE self) {
687
621
 
688
622
  output << ">";
689
623
 
690
- result = ENCODED_STR_NEW(output.str().data(), output.str().length(),
691
- p->pattern->options().encoding() == RE2::Options::EncodingUTF8 ? "UTF-8" : "ISO-8859-1");
692
-
693
- return result;
624
+ return encoded_str_new(output.str().data(), output.str().length(),
625
+ p->pattern->options().encoding());
694
626
  }
695
627
 
696
628
  /*
@@ -713,25 +645,22 @@ static VALUE re2_matchdata_inspect(VALUE self) {
713
645
  * puts "Unrecognised match"
714
646
  * end
715
647
  */
716
- static VALUE re2_matchdata_deconstruct(VALUE self) {
717
- int i;
648
+ static VALUE re2_matchdata_deconstruct(const VALUE self) {
718
649
  re2_matchdata *m;
719
650
  re2_pattern *p;
720
- re2::StringPiece *match;
721
- VALUE array;
722
651
 
723
652
  Data_Get_Struct(self, re2_matchdata, m);
724
653
  Data_Get_Struct(m->regexp, re2_pattern, p);
725
654
 
726
- array = rb_ary_new2(m->number_of_matches - 1);
727
- for (i = 1; i < m->number_of_matches; i++) {
728
- match = &m->matches[i];
655
+ VALUE array = rb_ary_new2(m->number_of_matches - 1);
656
+ for (int i = 1; i < m->number_of_matches; ++i) {
657
+ re2::StringPiece *match = &m->matches[i];
729
658
 
730
659
  if (match->empty()) {
731
660
  rb_ary_push(array, Qnil);
732
661
  } else {
733
- rb_ary_push(array, ENCODED_STR_NEW(match->data(), match->size(),
734
- p->pattern->options().encoding() == RE2::Options::EncodingUTF8 ? "UTF-8" : "ISO-8859-1"));
662
+ rb_ary_push(array, encoded_str_new(match->data(), match->size(),
663
+ p->pattern->options().encoding()));
735
664
  }
736
665
  }
737
666
 
@@ -766,40 +695,36 @@ static VALUE re2_matchdata_deconstruct(VALUE self) {
766
695
  * puts "Unrecognised match"
767
696
  * end
768
697
  */
769
- static VALUE re2_matchdata_deconstruct_keys(VALUE self, VALUE keys) {
770
- int i;
771
- VALUE capturing_groups, key;
698
+ static VALUE re2_matchdata_deconstruct_keys(const VALUE self, const VALUE keys) {
772
699
  re2_matchdata *m;
773
700
  re2_pattern *p;
774
- map<string, int> groups;
775
- map<string, int>::iterator iterator;
776
701
 
777
702
  Data_Get_Struct(self, re2_matchdata, m);
778
703
  Data_Get_Struct(m->regexp, re2_pattern, p);
779
704
 
780
- groups = p->pattern->NamedCapturingGroups();
781
- capturing_groups = rb_hash_new();
705
+ const std::map<std::string, int>& groups = p->pattern->NamedCapturingGroups();
706
+ VALUE capturing_groups = rb_hash_new();
782
707
 
783
708
  if (NIL_P(keys)) {
784
- for (iterator = groups.begin(); iterator != groups.end(); iterator++) {
709
+ for (std::map<std::string, int>::const_iterator it = groups.begin(); it != groups.end(); ++it) {
785
710
  rb_hash_aset(capturing_groups,
786
- ID2SYM(rb_intern(iterator->first.data())),
787
- re2_matchdata_nth_match(iterator->second, self));
711
+ ID2SYM(rb_intern(it->first.data())),
712
+ re2_matchdata_nth_match(it->second, self));
788
713
  }
789
714
  } else {
790
715
  Check_Type(keys, T_ARRAY);
791
716
 
792
717
  if (p->pattern->NumberOfCapturingGroups() >= RARRAY_LEN(keys)) {
793
- for (i = 0; i < RARRAY_LEN(keys); i++) {
794
- key = rb_ary_entry(keys, i);
718
+ for (int i = 0; i < RARRAY_LEN(keys); ++i) {
719
+ VALUE key = rb_ary_entry(keys, i);
795
720
  Check_Type(key, T_SYMBOL);
796
- string name(rb_id2name(SYM2ID(key)));
721
+ const char *name = rb_id2name(SYM2ID(key));
797
722
 
798
- if (groups.count(name) == 0) {
723
+ if (std::map<std::string, int>::const_iterator search = groups.find(name); search != groups.end()) {
724
+ rb_hash_aset(capturing_groups, key, re2_matchdata_nth_match(search->second, self));
725
+ } else {
799
726
  break;
800
727
  }
801
-
802
- rb_hash_aset(capturing_groups, key, re2_matchdata_nth_match(groups[name], self));
803
728
  }
804
729
  }
805
730
  }
@@ -816,6 +741,7 @@ static VALUE re2_matchdata_deconstruct_keys(VALUE self, VALUE keys) {
816
741
  */
817
742
  static VALUE re2_re2(int argc, VALUE *argv, VALUE self) {
818
743
  UNUSED(self);
744
+
819
745
  return rb_class_new_instance(argc, argv, re2_cRegexp);
820
746
  }
821
747
 
@@ -863,11 +789,11 @@ static VALUE re2_regexp_initialize(int argc, VALUE *argv, VALUE self) {
863
789
 
864
790
  if (RTEST(options)) {
865
791
  RE2::Options re2_options;
866
- parse_re2_options(re2_options, options);
792
+ parse_re2_options(&re2_options, options);
867
793
 
868
- p->pattern = new(nothrow) RE2(StringValuePtr(pattern), re2_options);
794
+ p->pattern = new(std::nothrow) RE2(StringValuePtr(pattern), re2_options);
869
795
  } else {
870
- p->pattern = new(nothrow) RE2(StringValuePtr(pattern));
796
+ p->pattern = new(std::nothrow) RE2(StringValuePtr(pattern));
871
797
  }
872
798
 
873
799
  if (p->pattern == 0) {
@@ -889,19 +815,17 @@ static VALUE re2_regexp_initialize(int argc, VALUE *argv, VALUE self) {
889
815
  * re2 = RE2::Regexp.new("woo?")
890
816
  * re2.inspect #=> "#<RE2::Regexp /woo?/>"
891
817
  */
892
- static VALUE re2_regexp_inspect(VALUE self) {
818
+ static VALUE re2_regexp_inspect(const VALUE self) {
893
819
  re2_pattern *p;
894
- VALUE result;
895
- ostringstream output;
896
820
 
897
821
  Data_Get_Struct(self, re2_pattern, p);
898
822
 
899
- output << "#<RE2::Regexp /" << p->pattern->pattern() << "/>";
823
+ std::ostringstream output;
900
824
 
901
- result = ENCODED_STR_NEW(output.str().data(), output.str().length(),
902
- p->pattern->options().encoding() == RE2::Options::EncodingUTF8 ? "UTF-8" : "ISO-8859-1");
825
+ output << "#<RE2::Regexp /" << p->pattern->pattern() << "/>";
903
826
 
904
- return result;
827
+ return encoded_str_new(output.str().data(), output.str().length(),
828
+ p->pattern->options().encoding());
905
829
  }
906
830
 
907
831
  /*
@@ -916,12 +840,13 @@ static VALUE re2_regexp_inspect(VALUE self) {
916
840
  * re2 = RE2::Regexp.new("woo?")
917
841
  * re2.to_s #=> "woo?"
918
842
  */
919
- static VALUE re2_regexp_to_s(VALUE self) {
843
+ static VALUE re2_regexp_to_s(const VALUE self) {
920
844
  re2_pattern *p;
921
845
  Data_Get_Struct(self, re2_pattern, p);
922
- return ENCODED_STR_NEW(p->pattern->pattern().data(),
846
+
847
+ return encoded_str_new(p->pattern->pattern().data(),
923
848
  p->pattern->pattern().size(),
924
- p->pattern->options().encoding() == RE2::Options::EncodingUTF8 ? "UTF-8" : "ISO-8859-1");
849
+ p->pattern->options().encoding());
925
850
  }
926
851
 
927
852
  /*
@@ -933,9 +858,10 @@ static VALUE re2_regexp_to_s(VALUE self) {
933
858
  * re2 = RE2::Regexp.new("woo?")
934
859
  * re2.ok? #=> true
935
860
  */
936
- static VALUE re2_regexp_ok(VALUE self) {
861
+ static VALUE re2_regexp_ok(const VALUE self) {
937
862
  re2_pattern *p;
938
863
  Data_Get_Struct(self, re2_pattern, p);
864
+
939
865
  return BOOL2RUBY(p->pattern->ok());
940
866
  }
941
867
 
@@ -948,9 +874,10 @@ static VALUE re2_regexp_ok(VALUE self) {
948
874
  * re2 = RE2::Regexp.new("woo?", :utf8 => true)
949
875
  * re2.utf8? #=> true
950
876
  */
951
- static VALUE re2_regexp_utf8(VALUE self) {
877
+ static VALUE re2_regexp_utf8(const VALUE self) {
952
878
  re2_pattern *p;
953
879
  Data_Get_Struct(self, re2_pattern, p);
880
+
954
881
  return BOOL2RUBY(p->pattern->options().encoding() == RE2::Options::EncodingUTF8);
955
882
  }
956
883
 
@@ -963,9 +890,10 @@ static VALUE re2_regexp_utf8(VALUE self) {
963
890
  * re2 = RE2::Regexp.new("woo?", :posix_syntax => true)
964
891
  * re2.posix_syntax? #=> true
965
892
  */
966
- static VALUE re2_regexp_posix_syntax(VALUE self) {
893
+ static VALUE re2_regexp_posix_syntax(const VALUE self) {
967
894
  re2_pattern *p;
968
895
  Data_Get_Struct(self, re2_pattern, p);
896
+
969
897
  return BOOL2RUBY(p->pattern->options().posix_syntax());
970
898
  }
971
899
 
@@ -978,9 +906,10 @@ static VALUE re2_regexp_posix_syntax(VALUE self) {
978
906
  * re2 = RE2::Regexp.new("woo?", :longest_match => true)
979
907
  * re2.longest_match? #=> true
980
908
  */
981
- static VALUE re2_regexp_longest_match(VALUE self) {
909
+ static VALUE re2_regexp_longest_match(const VALUE self) {
982
910
  re2_pattern *p;
983
911
  Data_Get_Struct(self, re2_pattern, p);
912
+
984
913
  return BOOL2RUBY(p->pattern->options().longest_match());
985
914
  }
986
915
 
@@ -993,9 +922,10 @@ static VALUE re2_regexp_longest_match(VALUE self) {
993
922
  * re2 = RE2::Regexp.new("woo?", :log_errors => true)
994
923
  * re2.log_errors? #=> true
995
924
  */
996
- static VALUE re2_regexp_log_errors(VALUE self) {
925
+ static VALUE re2_regexp_log_errors(const VALUE self) {
997
926
  re2_pattern *p;
998
927
  Data_Get_Struct(self, re2_pattern, p);
928
+
999
929
  return BOOL2RUBY(p->pattern->options().log_errors());
1000
930
  }
1001
931
 
@@ -1008,9 +938,10 @@ static VALUE re2_regexp_log_errors(VALUE self) {
1008
938
  * re2 = RE2::Regexp.new("woo?", :max_mem => 1024)
1009
939
  * re2.max_mem #=> 1024
1010
940
  */
1011
- static VALUE re2_regexp_max_mem(VALUE self) {
941
+ static VALUE re2_regexp_max_mem(const VALUE self) {
1012
942
  re2_pattern *p;
1013
943
  Data_Get_Struct(self, re2_pattern, p);
944
+
1014
945
  return INT2FIX(p->pattern->options().max_mem());
1015
946
  }
1016
947
 
@@ -1023,9 +954,10 @@ static VALUE re2_regexp_max_mem(VALUE self) {
1023
954
  * re2 = RE2::Regexp.new("woo?", :literal => true)
1024
955
  * re2.literal? #=> true
1025
956
  */
1026
- static VALUE re2_regexp_literal(VALUE self) {
957
+ static VALUE re2_regexp_literal(const VALUE self) {
1027
958
  re2_pattern *p;
1028
959
  Data_Get_Struct(self, re2_pattern, p);
960
+
1029
961
  return BOOL2RUBY(p->pattern->options().literal());
1030
962
  }
1031
963
 
@@ -1038,9 +970,10 @@ static VALUE re2_regexp_literal(VALUE self) {
1038
970
  * re2 = RE2::Regexp.new("woo?", :never_nl => true)
1039
971
  * re2.never_nl? #=> true
1040
972
  */
1041
- static VALUE re2_regexp_never_nl(VALUE self) {
973
+ static VALUE re2_regexp_never_nl(const VALUE self) {
1042
974
  re2_pattern *p;
1043
975
  Data_Get_Struct(self, re2_pattern, p);
976
+
1044
977
  return BOOL2RUBY(p->pattern->options().never_nl());
1045
978
  }
1046
979
 
@@ -1053,9 +986,10 @@ static VALUE re2_regexp_never_nl(VALUE self) {
1053
986
  * re2 = RE2::Regexp.new("woo?", :case_sensitive => true)
1054
987
  * re2.case_sensitive? #=> true
1055
988
  */
1056
- static VALUE re2_regexp_case_sensitive(VALUE self) {
989
+ static VALUE re2_regexp_case_sensitive(const VALUE self) {
1057
990
  re2_pattern *p;
1058
991
  Data_Get_Struct(self, re2_pattern, p);
992
+
1059
993
  return BOOL2RUBY(p->pattern->options().case_sensitive());
1060
994
  }
1061
995
 
@@ -1069,7 +1003,7 @@ static VALUE re2_regexp_case_sensitive(VALUE self) {
1069
1003
  * re2.case_insensitive? #=> false
1070
1004
  * re2.casefold? #=> false
1071
1005
  */
1072
- static VALUE re2_regexp_case_insensitive(VALUE self) {
1006
+ static VALUE re2_regexp_case_insensitive(const VALUE self) {
1073
1007
  return BOOL2RUBY(re2_regexp_case_sensitive(self) != Qtrue);
1074
1008
  }
1075
1009
 
@@ -1082,9 +1016,10 @@ static VALUE re2_regexp_case_insensitive(VALUE self) {
1082
1016
  * re2 = RE2::Regexp.new("woo?", :perl_classes => true)
1083
1017
  * re2.perl_classes? #=> true
1084
1018
  */
1085
- static VALUE re2_regexp_perl_classes(VALUE self) {
1019
+ static VALUE re2_regexp_perl_classes(const VALUE self) {
1086
1020
  re2_pattern *p;
1087
1021
  Data_Get_Struct(self, re2_pattern, p);
1022
+
1088
1023
  return BOOL2RUBY(p->pattern->options().perl_classes());
1089
1024
  }
1090
1025
 
@@ -1097,9 +1032,10 @@ static VALUE re2_regexp_perl_classes(VALUE self) {
1097
1032
  * re2 = RE2::Regexp.new("woo?", :word_boundary => true)
1098
1033
  * re2.word_boundary? #=> true
1099
1034
  */
1100
- static VALUE re2_regexp_word_boundary(VALUE self) {
1035
+ static VALUE re2_regexp_word_boundary(const VALUE self) {
1101
1036
  re2_pattern *p;
1102
1037
  Data_Get_Struct(self, re2_pattern, p);
1038
+
1103
1039
  return BOOL2RUBY(p->pattern->options().word_boundary());
1104
1040
  }
1105
1041
 
@@ -1112,9 +1048,10 @@ static VALUE re2_regexp_word_boundary(VALUE self) {
1112
1048
  * re2 = RE2::Regexp.new("woo?", :one_line => true)
1113
1049
  * re2.one_line? #=> true
1114
1050
  */
1115
- static VALUE re2_regexp_one_line(VALUE self) {
1051
+ static VALUE re2_regexp_one_line(const VALUE self) {
1116
1052
  re2_pattern *p;
1117
1053
  Data_Get_Struct(self, re2_pattern, p);
1054
+
1118
1055
  return BOOL2RUBY(p->pattern->options().one_line());
1119
1056
  }
1120
1057
 
@@ -1124,9 +1061,10 @@ static VALUE re2_regexp_one_line(VALUE self) {
1124
1061
  *
1125
1062
  * @return [String, nil] the error string or nil
1126
1063
  */
1127
- static VALUE re2_regexp_error(VALUE self) {
1064
+ static VALUE re2_regexp_error(const VALUE self) {
1128
1065
  re2_pattern *p;
1129
1066
  Data_Get_Struct(self, re2_pattern, p);
1067
+
1130
1068
  if (p->pattern->ok()) {
1131
1069
  return Qnil;
1132
1070
  } else {
@@ -1144,15 +1082,16 @@ static VALUE re2_regexp_error(VALUE self) {
1144
1082
  *
1145
1083
  * @return [String, nil] the offending portion of the regexp or nil
1146
1084
  */
1147
- static VALUE re2_regexp_error_arg(VALUE self) {
1085
+ static VALUE re2_regexp_error_arg(const VALUE self) {
1148
1086
  re2_pattern *p;
1149
1087
  Data_Get_Struct(self, re2_pattern, p);
1088
+
1150
1089
  if (p->pattern->ok()) {
1151
1090
  return Qnil;
1152
1091
  } else {
1153
- return ENCODED_STR_NEW(p->pattern->error_arg().data(),
1092
+ return encoded_str_new(p->pattern->error_arg().data(),
1154
1093
  p->pattern->error_arg().size(),
1155
- p->pattern->options().encoding() == RE2::Options::EncodingUTF8 ? "UTF-8" : "ISO-8859-1");
1094
+ p->pattern->options().encoding());
1156
1095
  }
1157
1096
  }
1158
1097
 
@@ -1163,9 +1102,10 @@ static VALUE re2_regexp_error_arg(VALUE self) {
1163
1102
  *
1164
1103
  * @return [Integer] the regexp "cost"
1165
1104
  */
1166
- static VALUE re2_regexp_program_size(VALUE self) {
1105
+ static VALUE re2_regexp_program_size(const VALUE self) {
1167
1106
  re2_pattern *p;
1168
1107
  Data_Get_Struct(self, re2_pattern, p);
1108
+
1169
1109
  return INT2FIX(p->pattern->ProgramSize());
1170
1110
  }
1171
1111
 
@@ -1175,12 +1115,11 @@ static VALUE re2_regexp_program_size(VALUE self) {
1175
1115
  *
1176
1116
  * @return [Hash] the options
1177
1117
  */
1178
- static VALUE re2_regexp_options(VALUE self) {
1179
- VALUE options;
1118
+ static VALUE re2_regexp_options(const VALUE self) {
1180
1119
  re2_pattern *p;
1181
1120
 
1182
1121
  Data_Get_Struct(self, re2_pattern, p);
1183
- options = rb_hash_new();
1122
+ VALUE options = rb_hash_new();
1184
1123
 
1185
1124
  rb_hash_aset(options, ID2SYM(id_utf8),
1186
1125
  BOOL2RUBY(p->pattern->options().encoding() == RE2::Options::EncodingUTF8));
@@ -1228,10 +1167,10 @@ static VALUE re2_regexp_options(VALUE self) {
1228
1167
  *
1229
1168
  * @return [Integer] the number of capturing subpatterns
1230
1169
  */
1231
- static VALUE re2_regexp_number_of_capturing_groups(VALUE self) {
1170
+ static VALUE re2_regexp_number_of_capturing_groups(const VALUE self) {
1232
1171
  re2_pattern *p;
1233
-
1234
1172
  Data_Get_Struct(self, re2_pattern, p);
1173
+
1235
1174
  return INT2FIX(p->pattern->NumberOfCapturingGroups());
1236
1175
  }
1237
1176
 
@@ -1244,21 +1183,18 @@ static VALUE re2_regexp_number_of_capturing_groups(VALUE self) {
1244
1183
  *
1245
1184
  * @return [Hash] a hash of names to capturing indices
1246
1185
  */
1247
- static VALUE re2_regexp_named_capturing_groups(VALUE self) {
1248
- VALUE capturing_groups;
1186
+ static VALUE re2_regexp_named_capturing_groups(const VALUE self) {
1249
1187
  re2_pattern *p;
1250
- map<string, int> groups;
1251
- map<string, int>::iterator iterator;
1252
1188
 
1253
1189
  Data_Get_Struct(self, re2_pattern, p);
1254
- groups = p->pattern->NamedCapturingGroups();
1255
- capturing_groups = rb_hash_new();
1190
+ const std::map<std::string, int>& groups = p->pattern->NamedCapturingGroups();
1191
+ VALUE capturing_groups = rb_hash_new();
1256
1192
 
1257
- for (iterator = groups.begin(); iterator != groups.end(); iterator++) {
1193
+ for (std::map<std::string, int>::const_iterator it = groups.begin(); it != groups.end(); ++it) {
1258
1194
  rb_hash_aset(capturing_groups,
1259
- ENCODED_STR_NEW(iterator->first.data(), iterator->first.size(),
1260
- p->pattern->options().encoding() == RE2::Options::EncodingUTF8 ? "UTF-8" : "ISO-8859-1"),
1261
- INT2FIX(iterator->second));
1195
+ encoded_str_new(it->first.data(), it->first.size(),
1196
+ p->pattern->options().encoding()),
1197
+ INT2FIX(it->second));
1262
1198
  }
1263
1199
 
1264
1200
  return capturing_groups;
@@ -1316,20 +1252,20 @@ static VALUE re2_regexp_named_capturing_groups(VALUE self) {
1316
1252
  * r.match('woo', 1) #=> #<RE2::MatchData "woo" 1:"o">
1317
1253
  * r.match('woo', 3) #=> #<RE2::MatchData "woo" 1:"o" 2:"o" 3:nil>
1318
1254
  */
1319
- static VALUE re2_regexp_match(int argc, VALUE *argv, VALUE self) {
1320
- int n;
1321
- bool matched;
1255
+ static VALUE re2_regexp_match(int argc, VALUE *argv, const VALUE self) {
1322
1256
  re2_pattern *p;
1323
1257
  re2_matchdata *m;
1324
- VALUE text, number_of_matches, matchdata;
1258
+ VALUE text, number_of_matches;
1325
1259
 
1326
1260
  rb_scan_args(argc, argv, "11", &text, &number_of_matches);
1327
1261
 
1328
1262
  /* Ensure text is a string. */
1329
- text = StringValue(text);
1263
+ StringValue(text);
1330
1264
 
1331
1265
  Data_Get_Struct(self, re2_pattern, p);
1332
1266
 
1267
+ int n;
1268
+
1333
1269
  if (RTEST(number_of_matches)) {
1334
1270
  n = NUM2INT(number_of_matches);
1335
1271
 
@@ -1345,17 +1281,21 @@ static VALUE re2_regexp_match(int argc, VALUE *argv, VALUE self) {
1345
1281
  }
1346
1282
 
1347
1283
  if (n == 0) {
1348
- matched = match(p->pattern, StringValuePtr(text), 0,
1349
- static_cast<int>(RSTRING_LEN(text)), RE2::UNANCHORED, 0, 0);
1284
+ #ifdef HAVE_ENDPOS_ARGUMENT
1285
+ bool matched = p->pattern->Match(StringValuePtr(text), 0,
1286
+ RSTRING_LEN(text), RE2::UNANCHORED, 0, 0);
1287
+ #else
1288
+ bool matched = p->pattern->Match(StringValuePtr(text), 0, RE2::UNANCHORED,
1289
+ 0, 0);
1290
+ #endif
1350
1291
  return BOOL2RUBY(matched);
1351
1292
  } else {
1352
-
1353
1293
  /* Because match returns the whole match as well. */
1354
1294
  n += 1;
1355
1295
 
1356
- matchdata = rb_class_new_instance(0, 0, re2_cMatchData);
1296
+ VALUE matchdata = rb_class_new_instance(0, 0, re2_cMatchData);
1357
1297
  Data_Get_Struct(matchdata, re2_matchdata, m);
1358
- m->matches = new(nothrow) re2::StringPiece[n];
1298
+ m->matches = new(std::nothrow) re2::StringPiece[n];
1359
1299
  m->regexp = self;
1360
1300
  m->text = rb_str_dup(text);
1361
1301
  rb_str_freeze(m->text);
@@ -1367,10 +1307,13 @@ static VALUE re2_regexp_match(int argc, VALUE *argv, VALUE self) {
1367
1307
 
1368
1308
  m->number_of_matches = n;
1369
1309
 
1370
- matched = match(p->pattern, StringValuePtr(m->text), 0,
1371
- static_cast<int>(RSTRING_LEN(m->text)),
1372
- RE2::UNANCHORED, m->matches, n);
1373
-
1310
+ #ifdef HAVE_ENDPOS_ARGUMENT
1311
+ bool matched = p->pattern->Match(StringValuePtr(m->text), 0,
1312
+ RSTRING_LEN(m->text), RE2::UNANCHORED, m->matches, n);
1313
+ #else
1314
+ bool matched = p->pattern->Match(StringValuePtr(m->text), 0,
1315
+ RE2::UNANCHORED, m->matches, n);
1316
+ #endif
1374
1317
  if (matched) {
1375
1318
  return matchdata;
1376
1319
  } else {
@@ -1385,10 +1328,8 @@ static VALUE re2_regexp_match(int argc, VALUE *argv, VALUE self) {
1385
1328
  *
1386
1329
  * @return [Boolean] whether the match was successful
1387
1330
  */
1388
- static VALUE re2_regexp_match_p(VALUE self, VALUE text) {
1389
- VALUE argv[2];
1390
- argv[0] = text;
1391
- argv[1] = INT2FIX(0);
1331
+ static VALUE re2_regexp_match_p(const VALUE self, VALUE text) {
1332
+ VALUE argv[2] = { text, INT2FIX(0) };
1392
1333
 
1393
1334
  return re2_regexp_match(2, argv, self);
1394
1335
  }
@@ -1399,16 +1340,15 @@ static VALUE re2_regexp_match_p(VALUE self, VALUE text) {
1399
1340
  * @example
1400
1341
  * c = RE2::Regexp.new('(\w+)').scan("Foo bar baz")
1401
1342
  */
1402
- static VALUE re2_regexp_scan(VALUE self, VALUE text) {
1343
+ static VALUE re2_regexp_scan(const VALUE self, VALUE text) {
1403
1344
  re2_pattern *p;
1404
1345
  re2_scanner *c;
1405
- VALUE scanner;
1406
1346
 
1407
1347
  Data_Get_Struct(self, re2_pattern, p);
1408
- scanner = rb_class_new_instance(0, 0, re2_cScanner);
1348
+ VALUE scanner = rb_class_new_instance(0, 0, re2_cScanner);
1409
1349
  Data_Get_Struct(scanner, re2_scanner, c);
1410
1350
 
1411
- c->input = new(nothrow) re2::StringPiece(StringValuePtr(text));
1351
+ c->input = new(std::nothrow) re2::StringPiece(StringValuePtr(text));
1412
1352
  c->regexp = self;
1413
1353
  c->text = text;
1414
1354
 
@@ -1448,23 +1388,21 @@ static VALUE re2_Replace(VALUE self, VALUE str, VALUE pattern,
1448
1388
  /* Take a copy of str so it can be modified in-place by
1449
1389
  * RE2::Replace.
1450
1390
  */
1451
- string str_as_string(StringValuePtr(str));
1391
+ std::string str_as_string(StringValuePtr(str));
1452
1392
 
1453
1393
  /* Do the replacement. */
1454
1394
  if (rb_obj_is_kind_of(pattern, re2_cRegexp)) {
1455
1395
  Data_Get_Struct(pattern, re2_pattern, p);
1456
1396
  RE2::Replace(&str_as_string, *p->pattern, StringValuePtr(rewrite));
1457
1397
 
1458
- return ENCODED_STR_NEW(str_as_string.data(), str_as_string.size(),
1459
- p->pattern->options().encoding() == RE2::Options::EncodingUTF8 ? "UTF-8" : "ISO-8859-1");
1398
+ return encoded_str_new(str_as_string.data(), str_as_string.size(),
1399
+ p->pattern->options().encoding());
1460
1400
  } else {
1461
1401
  RE2::Replace(&str_as_string, StringValuePtr(pattern),
1462
1402
  StringValuePtr(rewrite));
1463
1403
 
1464
- return ENCODED_STR_NEW(str_as_string.data(), str_as_string.size(),
1465
- "UTF-8");
1404
+ return encoded_str_new(str_as_string.data(), str_as_string.size(), RE2::Options::EncodingUTF8);
1466
1405
  }
1467
-
1468
1406
  }
1469
1407
 
1470
1408
  /*
@@ -1491,21 +1429,20 @@ static VALUE re2_GlobalReplace(VALUE self, VALUE str, VALUE pattern,
1491
1429
  * RE2::GlobalReplace.
1492
1430
  */
1493
1431
  re2_pattern *p;
1494
- string str_as_string(StringValuePtr(str));
1432
+ std::string str_as_string(StringValuePtr(str));
1495
1433
 
1496
1434
  /* Do the replacement. */
1497
1435
  if (rb_obj_is_kind_of(pattern, re2_cRegexp)) {
1498
1436
  Data_Get_Struct(pattern, re2_pattern, p);
1499
1437
  RE2::GlobalReplace(&str_as_string, *p->pattern, StringValuePtr(rewrite));
1500
1438
 
1501
- return ENCODED_STR_NEW(str_as_string.data(), str_as_string.size(),
1502
- p->pattern->options().encoding() == RE2::Options::EncodingUTF8 ? "UTF-8" : "ISO-8859-1");
1439
+ return encoded_str_new(str_as_string.data(), str_as_string.size(),
1440
+ p->pattern->options().encoding());
1503
1441
  } else {
1504
1442
  RE2::GlobalReplace(&str_as_string, StringValuePtr(pattern),
1505
1443
  StringValuePtr(rewrite));
1506
1444
 
1507
- return ENCODED_STR_NEW(str_as_string.data(), str_as_string.size(),
1508
- "UTF-8");
1445
+ return encoded_str_new(str_as_string.data(), str_as_string.size(), RE2::Options::EncodingUTF8);
1509
1446
  }
1510
1447
  }
1511
1448
 
@@ -1521,11 +1458,12 @@ static VALUE re2_GlobalReplace(VALUE self, VALUE str, VALUE pattern,
1521
1458
  */
1522
1459
  static VALUE re2_QuoteMeta(VALUE self, VALUE unquoted) {
1523
1460
  UNUSED(self);
1524
- string quoted_string = RE2::QuoteMeta(StringValuePtr(unquoted));
1461
+ std::string quoted_string = RE2::QuoteMeta(StringValuePtr(unquoted));
1462
+
1525
1463
  return rb_str_new(quoted_string.data(), quoted_string.size());
1526
1464
  }
1527
1465
 
1528
- void re2_set_free(re2_set *self) {
1466
+ static void re2_set_free(re2_set *self) {
1529
1467
  if (self->set) {
1530
1468
  delete self->set;
1531
1469
  }
@@ -1535,6 +1473,7 @@ void re2_set_free(re2_set *self) {
1535
1473
  static VALUE re2_set_allocate(VALUE klass) {
1536
1474
  re2_set *s;
1537
1475
  VALUE result = Data_Make_Struct(klass, re2_set, 0, re2_set_free, s);
1476
+
1538
1477
  return result;
1539
1478
  }
1540
1479
 
@@ -1582,18 +1521,16 @@ static VALUE re2_set_allocate(VALUE klass) {
1582
1521
  static VALUE re2_set_initialize(int argc, VALUE *argv, VALUE self) {
1583
1522
  VALUE anchor, options;
1584
1523
  re2_set *s;
1585
- RE2::Anchor re2_anchor;
1524
+ RE2::Anchor re2_anchor = RE2::UNANCHORED;
1586
1525
  RE2::Options re2_options;
1587
1526
 
1588
1527
  rb_scan_args(argc, argv, "02", &anchor, &options);
1589
1528
  Data_Get_Struct(self, re2_set, s);
1590
1529
 
1591
1530
  if (RTEST(options)) {
1592
- parse_re2_options(re2_options, options);
1531
+ parse_re2_options(&re2_options, options);
1593
1532
  }
1594
- if (NIL_P(anchor)) {
1595
- re2_anchor = RE2::UNANCHORED;
1596
- } else {
1533
+ if (!NIL_P(anchor)) {
1597
1534
  Check_Type(anchor, T_SYMBOL);
1598
1535
  ID id_anchor = SYM2ID(anchor);
1599
1536
  if (id_anchor == id_unanchored) {
@@ -1607,7 +1544,7 @@ static VALUE re2_set_initialize(int argc, VALUE *argv, VALUE self) {
1607
1544
  }
1608
1545
  }
1609
1546
 
1610
- s->set = new(nothrow) RE2::Set(re2_options, re2_anchor);
1547
+ s->set = new(std::nothrow) RE2::Set(re2_options, re2_anchor);
1611
1548
  if (s->set == 0) {
1612
1549
  rb_raise(rb_eNoMemError, "not enough memory to allocate RE2::Set object");
1613
1550
  }
@@ -1715,26 +1652,27 @@ static VALUE re2_set_match_raises_errors_p(VALUE self) {
1715
1652
  * set.compile
1716
1653
  * set.match("abcdef", :exception => true) # => [0, 1]
1717
1654
  */
1718
- static VALUE re2_set_match(int argc, VALUE *argv, VALUE self) {
1719
- VALUE str, options, exception_option;
1655
+ static VALUE re2_set_match(int argc, VALUE *argv, const VALUE self) {
1656
+ VALUE str, options;
1720
1657
  bool raise_exception = true;
1721
1658
  rb_scan_args(argc, argv, "11", &str, &options);
1722
1659
 
1723
1660
  StringValue(str);
1724
1661
  re2::StringPiece data(RSTRING_PTR(str), RSTRING_LEN(str));
1725
- std::vector<int> v;
1726
1662
  re2_set *s;
1727
1663
  Data_Get_Struct(self, re2_set, s);
1728
1664
 
1729
1665
  if (RTEST(options)) {
1730
1666
  Check_Type(options, T_HASH);
1731
1667
 
1732
- exception_option = rb_hash_aref(options, ID2SYM(id_exception));
1668
+ VALUE exception_option = rb_hash_aref(options, ID2SYM(id_exception));
1733
1669
  if (!NIL_P(exception_option)) {
1734
1670
  raise_exception = RTEST(exception_option);
1735
1671
  }
1736
1672
  }
1737
1673
 
1674
+ std::vector<int> v;
1675
+
1738
1676
  if (raise_exception) {
1739
1677
  #ifdef HAVE_ERROR_INFO_ARGUMENT
1740
1678
  RE2::Set::ErrorInfo e;
@@ -1755,7 +1693,7 @@ static VALUE re2_set_match(int argc, VALUE *argv, VALUE self) {
1755
1693
  rb_raise(re2_eSetMatchError, "Unknown RE2::Set::ErrorKind: %d", e.kind);
1756
1694
  }
1757
1695
  } else {
1758
- for (size_t i = 0; i < v.size(); i++) {
1696
+ for (std::vector<int>::size_type i = 0; i < v.size(); ++i) {
1759
1697
  rb_ary_push(result, INT2FIX(v[i]));
1760
1698
  }
1761
1699
  }
@@ -1769,7 +1707,7 @@ static VALUE re2_set_match(int argc, VALUE *argv, VALUE self) {
1769
1707
  VALUE result = rb_ary_new2(v.size());
1770
1708
 
1771
1709
  if (matched) {
1772
- for (size_t i = 0; i < v.size(); i++) {
1710
+ for (std::vector<int>::size_type i = 0; i < v.size(); ++i) {
1773
1711
  rb_ary_push(result, INT2FIX(v[i]));
1774
1712
  }
1775
1713
  }
@@ -1919,7 +1857,7 @@ void Init_re2(void) {
1919
1857
  rb_define_singleton_method(re2_cRegexp, "compile",
1920
1858
  RUBY_METHOD_FUNC(rb_class_new_instance), -1);
1921
1859
 
1922
- rb_define_global_function("RE2", RUBY_METHOD_FUNC(re2_re2), -1);
1860
+ rb_define_module_function(rb_mKernel, "RE2", RUBY_METHOD_FUNC(re2_re2), -1);
1923
1861
 
1924
1862
  /* Create the symbols used in options. */
1925
1863
  id_utf8 = rb_intern("utf8");
data/lib/2.6/re2.so CHANGED
Binary file
data/lib/2.7/re2.so CHANGED
Binary file
data/lib/3.0/re2.so CHANGED
Binary file
data/lib/3.1/re2.so CHANGED
Binary file
data/lib/3.2/re2.so CHANGED
Binary file
data/lib/re2/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module RE2
4
- VERSION = "2.1.0"
4
+ VERSION = "2.1.1"
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: re2
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.1.0
4
+ version: 2.1.1
5
5
  platform: aarch64-linux
6
6
  authors:
7
7
  - Paul Mucur
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2023-09-16 00:00:00.000000000 Z
12
+ date: 2023-09-18 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rake-compiler