re2 0.6.0.pre → 0.6.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 65fc1ca01f0f974602264152462be26110297250
4
- data.tar.gz: e5fdd3a7c238d629d75d76032b557658f993553b
3
+ metadata.gz: dbde479e41f910dff23edbc656cbc6ab092c322c
4
+ data.tar.gz: c69628eea5d97fae581078584353b3125b06134d
5
5
  SHA512:
6
- metadata.gz: cbc9be6aef0659c89a13c0481b951467f009702e13cd4c6c426d4851c563ad16888959e2c878c79090e221909cd18f391b2bf3026b3cdaaf9d86317884e11c07
7
- data.tar.gz: 94b8ea6f3f3cf1b353fa82c70611e2623c93fea6c189f1bac02435c8bf3a5cacfde032ad95b5a855ea4e1c63f3f94a0b66b1174ad42a269e6e5d6ed4b9793df3
6
+ metadata.gz: fbceef56880f497c8d09da21123a12056ba5bc4c351bc7e49f88b657b323fc7e9d775bb1e48836222941954ffd356f6709348eb6d9b3f7bdfa0899b77e1d0e57
7
+ data.tar.gz: f5a1724a484d227cb9499611c285ceeb60fbf27e99b6cfe7a67c71eb3d7272745740eb79e25a5ba828cb25ec667cc51778429a26eca95b6569d94f814c0a48b3
@@ -1,4 +1,4 @@
1
- Copyright (c) 2010-2013, Paul Mucur.
1
+ Copyright (c) 2010-2014, Paul Mucur.
2
2
  All rights reserved.
3
3
 
4
4
  Redistribution and use in source and binary forms, with or without
data/README.md CHANGED
@@ -90,51 +90,24 @@ As of 0.3.0, you can use named groups:
90
90
  => "40"
91
91
  ```
92
92
 
93
- As of 0.4.0, you can mix `RE2::String` into strings to provide helpers from
94
- the opposite direction:
95
-
96
- ```console
97
- > require "re2/string"
98
- > string = "My name is Robert Paulson"
99
- => "My name is Robert Paulson"
100
- > string.extend(RE2::String)
101
- => "My name is Robert Paulson"
102
- > string.re2_sub("Robert", "Dave")
103
- => "My name is Dave Paulson"
104
- > string.re2_gsub("a", "e")
105
- => "My neme is Deve Peulson"
106
- > string.re2_match('D(\S+)')
107
- => #<RE2::MatchData "Deve" 1:"eve">
108
- > string.re2_escape
109
- => "My\\ neme\\ is\\ Deve\\ Peulson"
110
- ```
111
-
112
- If you want these available to all strings, you can reopen `String` like so:
113
-
114
- ```ruby
115
- class String
116
- include RE2::String
117
- end
118
- ```
119
-
120
- As of 0.5.0, you can use `RE2::Regexp#consume` to incrementally scan text for
93
+ As of 0.6.0, you can use `RE2::Regexp#scan` to incrementally scan text for
121
94
  matches (similar in purpose to Ruby's
122
95
  [`String#scan`](http://ruby-doc.org/core-2.0.0/String.html#method-i-scan)).
123
- Calling `consume` will return an `RE2::Consumer` which is
96
+ Calling `scan` will return an `RE2::Scanner` which is
124
97
  [enumerable](http://ruby-doc.org/core-2.0.0/Enumerable.html) meaning you can
125
98
  use `each` to iterate through the matches (and even use
126
99
  [`Enumerator::Lazy`](http://ruby-doc.org/core-2.0/Enumerator/Lazy.html)):
127
100
 
128
101
  ```ruby
129
102
  re = RE2('(\w+)')
130
- consumer = re.consume("It is a truth universally acknowledged")
131
- consumer.each do |match|
103
+ scanner = re.scan("It is a truth universally acknowledged")
104
+ scanner.each do |match|
132
105
  puts match
133
106
  end
134
107
 
135
- consumer.rewind
108
+ scanner.rewind
136
109
 
137
- enum = consumer.to_enum
110
+ enum = scanner.to_enum
138
111
  enum.next #=> ["It"]
139
112
  enum.next #=> ["is"]
140
113
  ```
@@ -155,7 +128,7 @@ Features
155
128
  * Checking for matches with `re2 =~ text`, `re2 === text` (for use in `case`
156
129
  statements) and `re2 !~ text`
157
130
 
158
- * Incrementally scanning text with `re2.consume(text)`
131
+ * Incrementally scanning text with `re2.scan(text)`
159
132
 
160
133
  * Checking regular expression compilation with `re2.ok?`, `re2.error` and
161
134
  `re2.error_arg`
@@ -165,16 +138,15 @@ Features
165
138
  * Checking the options for an expression with `re2.options` or individually
166
139
  with `re2.case_sensitive?`
167
140
 
168
- * Performing in-place replacement with [`RE2.Replace(str, pattern,
169
- replace)`](http://code.google.com/p/re2/source/browse/re2/re2.h#335)
141
+ * Performing a single string replacement with `pattern.replace(replacement,
142
+ original)`
170
143
 
171
- * Performing in-place global replacement with [`RE2.GlobalReplace(str,
172
- pattern,
173
- replace)`](http://code.google.com/p/re2/source/browse/re2/re2.h#352)
144
+ * Performing a global string replacement with
145
+ `pattern.replace_all(replacement, original)`
174
146
 
175
147
  * Escaping regular expressions with
176
- [`RE2::Regexp.escape(unquoted)`](http://code.google.com/p/re2/source/browse/re2/re2.h#377),
177
- `RE2::Regexp.quote(unquoted)` or `RE2.QuoteMeta(unquoted)`
148
+ [`RE2.escape(unquoted)`](http://code.google.com/p/re2/source/browse/re2/re2.h#377) and
149
+ `RE2.quote(unquoted)`
178
150
 
179
151
  Contact
180
152
  -------
@@ -2,7 +2,7 @@
2
2
  * re2 (http://github.com/mudge/re2)
3
3
  * Ruby bindings to re2, an "efficient, principled regular expression library"
4
4
  *
5
- * Copyright (c) 2010-2013, Paul Mucur (http://mudge.name)
5
+ * Copyright (c) 2010-2014, Paul Mucur (http://mudge.name)
6
6
  * Released under the BSD Licence, please see LICENSE.txt
7
7
  */
8
8
 
@@ -22,14 +22,23 @@ extern "C" {
22
22
  #include <ruby/encoding.h>
23
23
  #define ENCODED_STR_NEW(str, length, encoding) \
24
24
  ({ \
25
- VALUE _string = rb_str_new((const char *)str, (long)length); \
26
- int _enc = rb_enc_find_index((int)encoding); \
27
- rb_enc_associate_index(_string, _enc); \
28
- _string; \
29
- })
25
+ VALUE _string = rb_str_new(str, length); \
26
+ int _enc = rb_enc_find_index(encoding); \
27
+ rb_enc_associate_index(_string, _enc); \
28
+ _string; \
29
+ })
30
+ #define ENCODED_STR_NEW2(str, length, str2) \
31
+ ({ \
32
+ VALUE _string = rb_str_new(str, length); \
33
+ int _enc = rb_enc_get_index(str2); \
34
+ rb_enc_associate_index(_string, _enc); \
35
+ _string; \
36
+ })
30
37
  #else
31
38
  #define ENCODED_STR_NEW(str, length, encoding) \
32
39
  rb_str_new((const char *)str, (long)length)
40
+ #define ENCODED_STR_NEW2(str, length, str2) \
41
+ rb_str_new((const char *)str, (long)length)
33
42
  #endif
34
43
 
35
44
  #define BOOL2RUBY(v) (v ? Qtrue : Qfalse)
@@ -62,12 +71,12 @@ extern "C" {
62
71
  } re2_matchdata;
63
72
 
64
73
  typedef struct {
65
- re2::StringPiece input;
66
- int argc;
74
+ re2::StringPiece *input;
75
+ int number_of_capturing_groups;
67
76
  VALUE regexp, text;
68
- } re2_consumer;
77
+ } re2_scanner;
69
78
 
70
- VALUE re2_mRE2, re2_cRegexp, re2_cMatchData, re2_cConsumer;
79
+ VALUE re2_mRE2, re2_cRegexp, re2_cMatchData, re2_cScanner;
71
80
 
72
81
  /* Symbols used in RE2 options. */
73
82
  static ID id_utf8, id_posix_syntax, id_longest_match, id_log_errors,
@@ -86,12 +95,15 @@ extern "C" {
86
95
  free(self);
87
96
  }
88
97
 
89
- void re2_consumer_mark(re2_consumer* self) {
98
+ void re2_scanner_mark(re2_scanner* self) {
90
99
  rb_gc_mark(self->regexp);
91
100
  rb_gc_mark(self->text);
92
101
  }
93
102
 
94
- void re2_consumer_free(re2_consumer* self) {
103
+ void re2_scanner_free(re2_scanner* self) {
104
+ if (self->input) {
105
+ delete self->input;
106
+ }
95
107
  free(self);
96
108
  }
97
109
 
@@ -108,10 +120,10 @@ extern "C" {
108
120
  re2_matchdata_free, m);
109
121
  }
110
122
 
111
- static VALUE re2_consumer_allocate(VALUE klass) {
112
- re2_consumer *c;
113
- return Data_Make_Struct(klass, re2_consumer, re2_consumer_mark,
114
- re2_consumer_free, c);
123
+ static VALUE re2_scanner_allocate(VALUE klass) {
124
+ re2_scanner *c;
125
+ return Data_Make_Struct(klass, re2_scanner, re2_scanner_mark,
126
+ re2_scanner_free, c);
115
127
  }
116
128
 
117
129
  /*
@@ -130,37 +142,36 @@ extern "C" {
130
142
  }
131
143
 
132
144
  /*
133
- * Returns the string passed into the consumer.
145
+ * Returns the string passed into the scanner.
134
146
  *
135
147
  * @return [String] the original string.
136
148
  * @example
137
- * c = RE2::Regexp.new('(\d+)').consume("foo")
149
+ * c = RE2::Regexp.new('(\d+)').scan("foo")
138
150
  * c.string #=> "foo"
139
151
  */
140
- static VALUE re2_consumer_string(VALUE self) {
141
- re2_consumer *c;
142
- Data_Get_Struct(self, re2_consumer, c);
152
+ static VALUE re2_scanner_string(VALUE self) {
153
+ re2_scanner *c;
154
+ Data_Get_Struct(self, re2_scanner, c);
143
155
 
144
156
  return c->text;
145
157
  }
146
158
 
147
159
  /*
148
- * Rewind the consumer to the start of the string.
160
+ * Rewind the scanner to the start of the string.
149
161
  *
150
162
  * @example
151
- * c = RE2::Regexp.new('(\d+)').consume("1 2 3")
152
- * e = c.to_enum
153
- * e.next #=> ["1"]
154
- * e.next #=> ["2"]
155
- * c.rewind
156
- * e.next #=> ["1"]
163
+ * s = RE2::Regexp.new('(\d+)').scan("1 2 3")
164
+ * e = s.to_enum
165
+ * e.scan #=> ["1"]
166
+ * e.scan #=> ["2"]
167
+ * s.rewind
168
+ * e.scan #=> ["1"]
157
169
  */
158
- static VALUE re2_consumer_rewind(VALUE self) {
159
- re2_consumer *c;
160
- Data_Get_Struct(self, re2_consumer, c);
161
- re2::StringPiece input(RSTRING_PTR(c->text));
170
+ static VALUE re2_scanner_rewind(VALUE self) {
171
+ re2_scanner *c;
172
+ Data_Get_Struct(self, re2_scanner, c);
162
173
 
163
- c->input = input;
174
+ c->input = new(nothrow) re2::StringPiece(StringValuePtr(c->text));
164
175
 
165
176
  return self;
166
177
  }
@@ -171,31 +182,33 @@ extern "C" {
171
182
  *
172
183
  * @return [Array<String>] the matches.
173
184
  * @example
174
- * c = RE2::Regexp.new('(\w+)').consume("Foo bar baz")
175
- * c.consume #=> ["Foo"]
176
- * c.consume #=> ["bar"]
185
+ * s = RE2::Regexp.new('(\w+)').scan("Foo bar baz")
186
+ * s.scan #=> ["Foo"]
187
+ * s.scan #=> ["bar"]
177
188
  */
178
- static VALUE re2_consumer_consume(VALUE self) {
189
+ static VALUE re2_scanner_scan(VALUE self) {
179
190
  int i;
180
191
  re2_pattern *p;
181
- re2_consumer *c;
192
+ re2_scanner *c;
182
193
  VALUE result;
183
194
 
184
- Data_Get_Struct(self, re2_consumer, c);
195
+ Data_Get_Struct(self, re2_scanner, c);
185
196
  Data_Get_Struct(c->regexp, re2_pattern, p);
186
197
 
187
- vector<RE2::Arg> argv(c->argc);
188
- vector<RE2::Arg*> args(c->argc);
189
- vector<string> matches(c->argc);
198
+ vector<RE2::Arg> argv(c->number_of_capturing_groups);
199
+ vector<RE2::Arg*> args(c->number_of_capturing_groups);
200
+ vector<string> matches(c->number_of_capturing_groups);
190
201
 
191
- for (i = 0; i < c->argc; i++) {
192
- args[i] = &argv[i];
202
+ for (i = 0; i < c->number_of_capturing_groups; i++) {
203
+ matches[i] = "";
193
204
  argv[i] = &matches[i];
205
+ args[i] = &argv[i];
194
206
  }
195
207
 
196
- if (RE2::FindAndConsumeN(&c->input, *p->pattern, &args[0], c->argc)) {
197
- result = rb_ary_new2(c->argc);
198
- for (i = 0; i < c->argc; i++) {
208
+ if (RE2::FindAndConsumeN(c->input, *p->pattern, &args[0],
209
+ c->number_of_capturing_groups)) {
210
+ result = rb_ary_new2(c->number_of_capturing_groups);
211
+ for (i = 0; i < c->number_of_capturing_groups; i++) {
199
212
  if (matches[i].empty()) {
200
213
  rb_ary_push(result, Qnil);
201
214
  } else {
@@ -242,16 +255,16 @@ extern "C" {
242
255
  }
243
256
 
244
257
  /*
245
- * Returns the {RE2::Regexp} used in the consumer.
258
+ * Returns the {RE2::Regexp} used in the scanner.
246
259
  *
247
- * @return [RE2::Regexp] the regexp used in the consumer
260
+ * @return [RE2::Regexp] the regexp used in the scanner
248
261
  * @example
249
- * c = RE2::Regexp.new('(\d+)').consume("bob 123")
262
+ * c = RE2::Regexp.new('(\d+)').scan("bob 123")
250
263
  * c.regexp #=> #<RE2::Regexp /(\d+)/>
251
264
  */
252
- static VALUE re2_consumer_regexp(VALUE self) {
253
- re2_consumer *c;
254
- Data_Get_Struct(self, re2_consumer, c);
265
+ static VALUE re2_scanner_regexp(VALUE self) {
266
+ re2_scanner *c;
267
+ Data_Get_Struct(self, re2_scanner, c);
255
268
 
256
269
  return c->regexp;
257
270
  }
@@ -1061,31 +1074,31 @@ extern "C" {
1061
1074
  }
1062
1075
 
1063
1076
  /*
1064
- * Returns a {RE2::Consumer} for scanning the given text incrementally.
1077
+ * Returns a {RE2::Scanner} for scanning the given text incrementally.
1065
1078
  *
1066
1079
  * @example
1067
- * c = RE2::Regexp.new('(\w+)').consume("Foo bar baz")
1080
+ * c = RE2::Regexp.new('(\w+)').scan("Foo bar baz")
1068
1081
  */
1069
- static VALUE re2_regexp_consume(VALUE self, VALUE text) {
1082
+ static VALUE re2_regexp_scan(VALUE self, VALUE text) {
1070
1083
  re2_pattern *p;
1071
- re2_consumer *c;
1072
- VALUE consumer;
1073
- re2::StringPiece input(RSTRING_PTR(text));
1084
+ re2_scanner *c;
1085
+ VALUE scanner;
1074
1086
 
1075
1087
  Data_Get_Struct(self, re2_pattern, p);
1076
- consumer = rb_class_new_instance(0, 0, re2_cConsumer);
1077
- Data_Get_Struct(consumer, re2_consumer, c);
1078
- c->input = input;
1088
+ scanner = rb_class_new_instance(0, 0, re2_cScanner);
1089
+ Data_Get_Struct(scanner, re2_scanner, c);
1090
+
1091
+ c->input = new(nothrow) re2::StringPiece(StringValuePtr(text));
1079
1092
  c->regexp = self;
1080
1093
  c->text = text;
1081
- c->argc = p->pattern->NumberOfCapturingGroups();
1094
+ c->number_of_capturing_groups = p->pattern->NumberOfCapturingGroups();
1082
1095
 
1083
- return consumer;
1096
+ return scanner;
1084
1097
  }
1085
1098
 
1086
1099
  /*
1087
- * Replaces the first occurrence +pattern+ in +str+ with
1088
- * +rewrite+ <i>in place</i>.
1100
+ * Returns a copy of +str+ with the first occurrence +pattern+
1101
+ * replaced with +rewrite+.
1089
1102
  *
1090
1103
  * @param [String] str the string to modify
1091
1104
  * @param [String, RE2::Regexp] pattern a regexp matching text to be replaced
@@ -1095,18 +1108,10 @@ extern "C" {
1095
1108
  * RE2.Replace("hello there", "hello", "howdy") #=> "howdy there"
1096
1109
  * re2 = RE2.new("hel+o")
1097
1110
  * RE2.Replace("hello there", re2, "yo") #=> "yo there"
1098
- * text = "Good morning"
1099
- * RE2.Replace(text, "morn", "even") #=> "Good evening"
1100
- * text #=> "Good evening"
1101
1111
  */
1102
1112
  static VALUE re2_Replace(VALUE self, VALUE str, VALUE pattern,
1103
1113
  VALUE rewrite) {
1104
-
1105
- /* Look out for frozen strings. */
1106
- rb_check_frozen(str);
1107
-
1108
1114
  UNUSED(self);
1109
- VALUE repl;
1110
1115
  re2_pattern *p;
1111
1116
 
1112
1117
  /* Convert all the inputs to be pumped into RE2::Replace. */
@@ -1116,71 +1121,53 @@ extern "C" {
1116
1121
  if (rb_obj_is_kind_of(pattern, re2_cRegexp)) {
1117
1122
  Data_Get_Struct(pattern, re2_pattern, p);
1118
1123
  RE2::Replace(&str_as_string, *p->pattern, StringValuePtr(rewrite));
1124
+
1125
+ return ENCODED_STR_NEW(str_as_string.data(), str_as_string.size(),
1126
+ p->pattern->options().utf8() ? "UTF-8" : "ISO-8859-1");
1119
1127
  } else {
1120
1128
  RE2::Replace(&str_as_string, StringValuePtr(pattern),
1121
1129
  StringValuePtr(rewrite));
1122
- }
1123
1130
 
1124
- /* Save the replacement as a VALUE. */
1125
- repl = rb_str_new(str_as_string.data(), str_as_string.size());
1126
-
1127
- /* Replace the original string with the replacement. */
1128
- if (RSTRING_LEN(str) != RSTRING_LEN(repl)) {
1129
- rb_str_resize(str, RSTRING_LEN(repl));
1131
+ return ENCODED_STR_NEW2(str_as_string.data(), str_as_string.size(),
1132
+ pattern);
1130
1133
  }
1131
- memcpy(RSTRING_PTR(str), RSTRING_PTR(repl), RSTRING_LEN(repl));
1132
1134
 
1133
- return str;
1134
1135
  }
1135
1136
 
1136
1137
  /*
1137
- * Replaces every occurrence of +pattern+ in +str+ with
1138
- * +rewrite+ <i>in place</i>.
1138
+ * Return a copy of +str+ with +pattern+ replaced by +rewrite+.
1139
1139
  *
1140
1140
  * @param [String] str the string to modify
1141
1141
  * @param [String, RE2::Regexp] pattern a regexp matching text to be replaced
1142
1142
  * @param [String] rewrite the string to replace with
1143
1143
  * @return [String] the resulting string
1144
1144
  * @example
1145
- * RE2.GlobalReplace("hello there", "e", "i") #=> "hillo thiri"
1146
1145
  * re2 = RE2.new("oo?")
1147
1146
  * RE2.GlobalReplace("whoops-doops", re2, "e") #=> "wheps-deps"
1148
- * text = "Good morning"
1149
- * RE2.GlobalReplace(text, "o", "ee") #=> "Geeeed meerning"
1150
- * text #=> "Geeeed meerning"
1147
+ * RE2.GlobalReplace("hello there", "e", "i") #=> "hillo thiri"
1151
1148
  */
1152
1149
  static VALUE re2_GlobalReplace(VALUE self, VALUE str, VALUE pattern,
1153
1150
  VALUE rewrite) {
1154
-
1155
- /* Look out for frozen strings. */
1156
- rb_check_frozen(str);
1157
-
1158
1151
  UNUSED(self);
1159
1152
 
1160
1153
  /* Convert all the inputs to be pumped into RE2::GlobalReplace. */
1161
1154
  re2_pattern *p;
1162
1155
  string str_as_string(StringValuePtr(str));
1163
- VALUE repl;
1164
1156
 
1165
1157
  /* Do the replacement. */
1166
1158
  if (rb_obj_is_kind_of(pattern, re2_cRegexp)) {
1167
1159
  Data_Get_Struct(pattern, re2_pattern, p);
1168
1160
  RE2::GlobalReplace(&str_as_string, *p->pattern, StringValuePtr(rewrite));
1161
+
1162
+ return ENCODED_STR_NEW(str_as_string.data(), str_as_string.size(),
1163
+ p->pattern->options().utf8() ? "UTF-8" : "ISO-8859-1");
1169
1164
  } else {
1170
1165
  RE2::GlobalReplace(&str_as_string, StringValuePtr(pattern),
1171
1166
  StringValuePtr(rewrite));
1172
- }
1173
1167
 
1174
- /* Save the replacement as a VALUE. */
1175
- repl = rb_str_new(str_as_string.data(), str_as_string.size());
1176
-
1177
- /* Replace the original string with the replacement. */
1178
- if (RSTRING_LEN(str) != RSTRING_LEN(repl)) {
1179
- rb_str_resize(str, RSTRING_LEN(repl));
1168
+ return ENCODED_STR_NEW2(str_as_string.data(), str_as_string.size(),
1169
+ pattern);
1180
1170
  }
1181
- memcpy(RSTRING_PTR(str), RSTRING_PTR(repl), RSTRING_LEN(repl));
1182
-
1183
- return str;
1184
1171
  }
1185
1172
 
1186
1173
  /*
@@ -1203,13 +1190,13 @@ extern "C" {
1203
1190
  re2_mRE2 = rb_define_module("RE2");
1204
1191
  re2_cRegexp = rb_define_class_under(re2_mRE2, "Regexp", rb_cObject);
1205
1192
  re2_cMatchData = rb_define_class_under(re2_mRE2, "MatchData", rb_cObject);
1206
- re2_cConsumer = rb_define_class_under(re2_mRE2, "Consumer", rb_cObject);
1193
+ re2_cScanner = rb_define_class_under(re2_mRE2, "Scanner", rb_cObject);
1207
1194
 
1208
1195
  rb_define_alloc_func(re2_cRegexp, (VALUE (*)(VALUE))re2_regexp_allocate);
1209
1196
  rb_define_alloc_func(re2_cMatchData,
1210
1197
  (VALUE (*)(VALUE))re2_matchdata_allocate);
1211
- rb_define_alloc_func(re2_cConsumer,
1212
- (VALUE (*)(VALUE))re2_consumer_allocate);
1198
+ rb_define_alloc_func(re2_cScanner,
1199
+ (VALUE (*)(VALUE))re2_scanner_allocate);
1213
1200
 
1214
1201
  rb_define_method(re2_cMatchData, "string",
1215
1202
  RUBY_METHOD_FUNC(re2_matchdata_string), 0);
@@ -1227,14 +1214,14 @@ extern "C" {
1227
1214
  rb_define_method(re2_cMatchData, "inspect",
1228
1215
  RUBY_METHOD_FUNC(re2_matchdata_inspect), 0);
1229
1216
 
1230
- rb_define_method(re2_cConsumer, "string",
1231
- RUBY_METHOD_FUNC(re2_consumer_string), 0);
1232
- rb_define_method(re2_cConsumer, "regexp",
1233
- RUBY_METHOD_FUNC(re2_consumer_regexp), 0);
1234
- rb_define_method(re2_cConsumer, "consume",
1235
- RUBY_METHOD_FUNC(re2_consumer_consume), 0);
1236
- rb_define_method(re2_cConsumer, "rewind",
1237
- RUBY_METHOD_FUNC(re2_consumer_rewind), 0);
1217
+ rb_define_method(re2_cScanner, "string",
1218
+ RUBY_METHOD_FUNC(re2_scanner_string), 0);
1219
+ rb_define_method(re2_cScanner, "regexp",
1220
+ RUBY_METHOD_FUNC(re2_scanner_regexp), 0);
1221
+ rb_define_method(re2_cScanner, "scan",
1222
+ RUBY_METHOD_FUNC(re2_scanner_scan), 0);
1223
+ rb_define_method(re2_cScanner, "rewind",
1224
+ RUBY_METHOD_FUNC(re2_scanner_rewind), 0);
1238
1225
 
1239
1226
  rb_define_method(re2_cRegexp, "initialize",
1240
1227
  RUBY_METHOD_FUNC(re2_regexp_initialize), -1);
@@ -1259,7 +1246,8 @@ extern "C" {
1259
1246
  RUBY_METHOD_FUNC(re2_regexp_match_query), 1);
1260
1247
  rb_define_method(re2_cRegexp, "===",
1261
1248
  RUBY_METHOD_FUNC(re2_regexp_match_query), 1);
1262
- rb_define_method(re2_cRegexp, "consume", RUBY_METHOD_FUNC(re2_regexp_consume), 1);
1249
+ rb_define_method(re2_cRegexp, "scan",
1250
+ RUBY_METHOD_FUNC(re2_regexp_scan), 1);
1263
1251
  rb_define_method(re2_cRegexp, "to_s", RUBY_METHOD_FUNC(re2_regexp_to_s), 0);
1264
1252
  rb_define_method(re2_cRegexp, "to_str", RUBY_METHOD_FUNC(re2_regexp_to_s),
1265
1253
  0);
data/lib/re2.rb CHANGED
@@ -1,7 +1,7 @@
1
1
  # re2 (http://github.com/mudge/re2)
2
2
  # Ruby bindings to re2, an "efficient, principled regular expression library"
3
3
  #
4
- # Copyright (c) 2010-2013, Paul Mucur (http://mudge.name)
4
+ # Copyright (c) 2010-2014, Paul Mucur (http://mudge.name)
5
5
  # Released under the BSD Licence, please see LICENSE.txt
6
6
  require "re2.so"
7
- require "re2/consumer"
7
+ require "re2/scanner"
@@ -1,10 +1,10 @@
1
1
  module RE2
2
- class Consumer
2
+ class Scanner
3
3
  include Enumerable
4
4
 
5
5
  def each
6
6
  if block_given?
7
- while matches = consume
7
+ while matches = scan
8
8
  yield matches
9
9
  end
10
10
  else
@@ -1,7 +1,7 @@
1
1
  # re2 (http://github.com/mudge/re2)
2
2
  # Ruby bindings to re2, an "efficient, principled regular expression library"
3
3
  #
4
- # Copyright (c) 2010-2013, Paul Mucur (http://mudge.name)
4
+ # Copyright (c) 2010-2014, Paul Mucur (http://mudge.name)
5
5
  # Released under the BSD Licence, please see LICENSE.txt
6
6
 
7
7
  require "re2"
@@ -9,21 +9,6 @@ require "re2"
9
9
  module RE2
10
10
  module String
11
11
 
12
- # Replaces the first occurrence +pattern+ with +rewrite+ <i>in place</i>.
13
- #
14
- # @param [String, RE2::Regexp] pattern a regexp matching text to be replaced
15
- # @param [String] rewrite the string to replace with
16
- # @example
17
- # "hello there".re2_sub!("hello", "howdy") #=> "howdy there"
18
- # re2 = RE2.new("hel+o")
19
- # "hello there".re2_sub!(re2, "yo") #=> "yo there"
20
- # text = "Good morning"
21
- # text.re2_sub!("morn", "even") #=> "Good evening"
22
- # text #=> "Good evening"
23
- def re2_sub!(*args)
24
- RE2.Replace(self, *args)
25
- end
26
-
27
12
  # Replaces the first occurrence +pattern+ with +rewrite+ and returns a new
28
13
  # string.
29
14
  #
@@ -37,22 +22,7 @@ module RE2
37
22
  # text.re2_sub("morn", "even") #=> "Good evening"
38
23
  # text #=> "Good morning"
39
24
  def re2_sub(*args)
40
- dup.re2_sub!(*args)
41
- end
42
-
43
- # Replaces every occurrence of +pattern+ with +rewrite+ <i>in place</i>.
44
- #
45
- # @param [String, RE2::Regexp] pattern a regexp matching text to be replaced
46
- # @param [String] rewrite the string to replace with
47
- # @example
48
- # "hello there".re2_gsub!("e", "i") #=> "hillo thiri"
49
- # re2 = RE2.new("oo?")
50
- # "whoops-doops".re2_gsub!(re2, "e") #=> "wheps-deps"
51
- # text = "Good morning"
52
- # text.re2_gsub!("o", "ee") #=> "Geeeed meerning"
53
- # text #=> "Geeeed meerning"
54
- def re2_gsub!(*args)
55
- RE2.GlobalReplace(self, *args)
25
+ RE2.Replace(self, *args)
56
26
  end
57
27
 
58
28
  # Replaces every occurrence of +pattern+ with +rewrite+ and return a new string.
@@ -67,7 +37,7 @@ module RE2
67
37
  # text.re2_gsub("o", "ee") #=> "Geeeed meerning"
68
38
  # text #=> "Good morning"
69
39
  def re2_gsub(*args)
70
- dup.re2_gsub!(*args)
40
+ RE2.GlobalReplace(self, *args)
71
41
  end
72
42
 
73
43
  # Match the pattern and return either a boolean (if no submatches are required)
@@ -392,12 +392,12 @@ describe RE2::Regexp do
392
392
  end
393
393
  end
394
394
 
395
- describe "#consume" do
396
- it "returns a consumer" do
395
+ describe "#scan" do
396
+ it "returns a scanner" do
397
397
  r = RE2::Regexp.new('(\w+)')
398
- consumer = r.consume("It is a truth universally acknowledged")
398
+ scanner = r.scan("It is a truth universally acknowledged")
399
399
 
400
- consumer.must_be_instance_of(RE2::Consumer)
400
+ scanner.must_be_instance_of(RE2::Scanner)
401
401
  end
402
402
  end
403
403
  end
@@ -0,0 +1,90 @@
1
+ require "spec_helper"
2
+
3
+ describe RE2::Scanner do
4
+ describe "#regexp" do
5
+ it "returns the original pattern for the scanner" do
6
+ re = RE2::Regexp.new('(\w+)')
7
+ scanner = re.scan("It is a truth")
8
+
9
+ scanner.regexp.must_be_same_as(re)
10
+ end
11
+ end
12
+
13
+ describe "#string" do
14
+ it "returns the original text for the scanner" do
15
+ re = RE2::Regexp.new('(\w+)')
16
+ text = "It is a truth"
17
+ scanner = re.scan(text)
18
+
19
+ scanner.string.must_be_same_as(text)
20
+ end
21
+ end
22
+
23
+ describe "#scan" do
24
+ it "returns the next array of matches" do
25
+ r = RE2::Regexp.new('(\w+)')
26
+ scanner = r.scan("It is a truth universally acknowledged")
27
+ scanner.scan.must_equal(["It"])
28
+ scanner.scan.must_equal(["is"])
29
+ scanner.scan.must_equal(["a"])
30
+ scanner.scan.must_equal(["truth"])
31
+ scanner.scan.must_equal(["universally"])
32
+ scanner.scan.must_equal(["acknowledged"])
33
+ scanner.scan.must_be_nil
34
+ end
35
+
36
+ it "returns an empty array if there are no capturing groups" do
37
+ r = RE2::Regexp.new('\w+')
38
+ scanner = r.scan("Foo bar")
39
+ scanner.scan.must_equal([])
40
+ end
41
+
42
+ it "returns nil if there is no match" do
43
+ r = RE2::Regexp.new('\d+')
44
+ scanner = r.scan("Foo bar")
45
+ scanner.scan.must_be_nil
46
+ end
47
+ end
48
+
49
+ it "is enumerable" do
50
+ r = RE2::Regexp.new('(\d)')
51
+ scanner = r.scan("There are 1 some 2 numbers 3")
52
+ scanner.must_be_kind_of(Enumerable)
53
+ end
54
+
55
+ describe "#each" do
56
+ it "yields each match" do
57
+ r = RE2::Regexp.new('(\d)')
58
+ scanner = r.scan("There are 1 some 2 numbers 3")
59
+ matches = []
60
+ scanner.each do |match|
61
+ matches << match
62
+ end
63
+
64
+ matches.must_equal([["1"], ["2"], ["3"]])
65
+ end
66
+
67
+ it "returns an enumerator when not given a block" do
68
+ r = RE2::Regexp.new('(\d)')
69
+ scanner = r.scan("There are 1 some 2 numbers 3")
70
+
71
+ # Prior to Ruby 1.9, Enumerator was within Enumerable.
72
+ if defined?(Enumerator)
73
+ scanner.each.must_be_kind_of(Enumerator)
74
+ elsif defined?(Enumerable::Enumerator)
75
+ scanner.each.must_be_kind_of(Enumerable::Enumerator)
76
+ end
77
+ end
78
+ end
79
+
80
+ describe "#rewind" do
81
+ it "resets any consumption" do
82
+ r = RE2::Regexp.new('(\d)')
83
+ scanner = r.scan("There are 1 some 2 numbers 3")
84
+ scanner.to_enum.first.must_equal(["1"])
85
+ scanner.to_enum.first.must_equal(["2"])
86
+ scanner.rewind
87
+ scanner.to_enum.first.must_equal(["1"])
88
+ end
89
+ end
90
+ end
@@ -6,28 +6,6 @@ class String
6
6
  end
7
7
 
8
8
  describe RE2::String do
9
- describe "#re2_sub!" do
10
- it "delegates to RE2.Replace to perform replacement" do
11
- "My name is Robert Paulson".re2_sub!('Robert', 'Crobert').must_equal("My name is Crobert Paulson")
12
- end
13
-
14
- it "does perform an in-place replacement" do
15
- string = "My name is Robert Paulson"
16
- string.re2_sub!('Robert', 'Crobert').must_be_same_as(string)
17
- end
18
- end
19
-
20
- describe "#re2_gsub!" do
21
- it "delegates to RE2.GlobalReplace to perform replacement" do
22
- "My name is Robert Paulson".re2_gsub!('a', 'e').must_equal("My neme is Robert Peulson")
23
- end
24
-
25
- it "does perform an in-place replacement" do
26
- string = "My name is Robert Paulson"
27
- string.re2_gsub!('a', 'e').must_be_same_as(string)
28
- end
29
- end
30
-
31
9
  describe "#re2_sub" do
32
10
  it "delegates to RE2.Replace to perform replacement" do
33
11
  "My name is Robert Paulson".re2_sub('Robert', 'Crobert').must_equal("My name is Crobert Paulson")
@@ -14,11 +14,11 @@ describe RE2 do
14
14
  RE2.Replace("Good morning", "(?i)gOOD MORNING", "hi").must_equal("hi")
15
15
  end
16
16
 
17
- it "performs replacements in-place" do
17
+ it "does not perform replacements in-place" do
18
18
  name = "Robert"
19
19
  replacement = RE2.Replace(name, "R", "Cr")
20
20
  replacement.must_equal("Crobert")
21
- name.must_be_same_as(replacement)
21
+ name.wont_be_same_as(replacement)
22
22
  end
23
23
 
24
24
  it "supports passing an RE2::Regexp as the pattern" do
@@ -31,12 +31,6 @@ describe RE2 do
31
31
  RE2.Replace("Good morning", re, "hi").must_equal("hi")
32
32
  end
33
33
 
34
- it "raises an error if the string is frozen" do
35
- frozen_name = "Arnold".freeze
36
-
37
- proc { RE2.Replace(frozen_name, "o", "a") }.must_raise(TypeError, RuntimeError)
38
- end
39
-
40
34
  if String.method_defined?(:encoding)
41
35
  it "preserves the original string's encoding" do
42
36
  original = "Foo"
@@ -59,11 +53,11 @@ describe RE2 do
59
53
  RE2.GlobalReplace("Robert", "(?i)r", "w").must_equal("wobewt")
60
54
  end
61
55
 
62
- it "performs replacement in-place" do
56
+ it "does not perform replacement in-place" do
63
57
  name = "Robert"
64
58
  replacement = RE2.GlobalReplace(name, "(?i)R", "w")
65
59
  replacement.must_equal("wobewt")
66
- name.must_be_same_as(replacement)
60
+ name.wont_be_same_as(replacement)
67
61
  end
68
62
 
69
63
  it "supports passing an RE2::Regexp as the pattern" do
@@ -75,12 +69,6 @@ describe RE2 do
75
69
  re = RE2::Regexp.new('gOOD MORNING', :case_sensitive => false)
76
70
  RE2.GlobalReplace("Good morning Good morning", re, "hi").must_equal("hi hi")
77
71
  end
78
-
79
- it "raises an error if the string is frozen" do
80
- frozen_name = "Arnold".freeze
81
-
82
- proc { RE2.GlobalReplace(frozen_name, "o", "a") }.must_raise(TypeError, RuntimeError)
83
- end
84
72
  end
85
73
 
86
74
  describe "#QuoteMeta" do
metadata CHANGED
@@ -1,41 +1,41 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: re2
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.0.pre
4
+ version: 0.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Paul Mucur
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-09-15 00:00:00.000000000 Z
11
+ date: 2014-02-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake-compiler
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - '>='
17
+ - - ">="
18
18
  - !ruby/object:Gem::Version
19
19
  version: '0'
20
20
  type: :development
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - '>='
24
+ - - ">="
25
25
  - !ruby/object:Gem::Version
26
26
  version: '0'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: minitest
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
- - - '>='
31
+ - - ">="
32
32
  - !ruby/object:Gem::Version
33
33
  version: '0'
34
34
  type: :development
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
- - - '>='
38
+ - - ">="
39
39
  - !ruby/object:Gem::Version
40
40
  version: '0'
41
41
  description: Ruby bindings to re2, "an efficient, principled regular expression library".
@@ -45,23 +45,24 @@ extensions:
45
45
  - ext/re2/extconf.rb
46
46
  extra_rdoc_files: []
47
47
  files:
48
+ - LICENSE.txt
49
+ - README.md
50
+ - Rakefile
48
51
  - ext/re2/extconf.rb
49
52
  - ext/re2/re2.cc
50
53
  - lib/re2.rb
51
- - lib/re2/consumer.rb
54
+ - lib/re2/scanner.rb
52
55
  - lib/re2/string.rb
53
- - LICENSE.txt
54
- - README.md
55
- - Rakefile
56
- - spec/spec_helper.rb
57
- - spec/re2_spec.rb
58
56
  - spec/kernel_spec.rb
59
- - spec/re2/regexp_spec.rb
60
57
  - spec/re2/match_data_spec.rb
58
+ - spec/re2/regexp_spec.rb
59
+ - spec/re2/scanner_spec.rb
61
60
  - spec/re2/string_spec.rb
62
- - spec/re2/consumer_spec.rb
61
+ - spec/re2_spec.rb
62
+ - spec/spec_helper.rb
63
63
  homepage: http://github.com/mudge/re2
64
- licenses: []
64
+ licenses:
65
+ - BSD
65
66
  metadata: {}
66
67
  post_install_message:
67
68
  rdoc_options: []
@@ -69,17 +70,17 @@ require_paths:
69
70
  - lib
70
71
  required_ruby_version: !ruby/object:Gem::Requirement
71
72
  requirements:
72
- - - '>='
73
+ - - ">="
73
74
  - !ruby/object:Gem::Version
74
75
  version: '0'
75
76
  required_rubygems_version: !ruby/object:Gem::Requirement
76
77
  requirements:
77
- - - '>'
78
+ - - ">="
78
79
  - !ruby/object:Gem::Version
79
- version: 1.3.1
80
+ version: '0'
80
81
  requirements: []
81
82
  rubyforge_project:
82
- rubygems_version: 2.0.3
83
+ rubygems_version: 2.2.0
83
84
  signing_key:
84
85
  specification_version: 4
85
86
  summary: Ruby bindings to re2.
@@ -90,4 +91,4 @@ test_files:
90
91
  - spec/re2/regexp_spec.rb
91
92
  - spec/re2/match_data_spec.rb
92
93
  - spec/re2/string_spec.rb
93
- - spec/re2/consumer_spec.rb
94
+ - spec/re2/scanner_spec.rb
@@ -1,90 +0,0 @@
1
- require "spec_helper"
2
-
3
- describe RE2::Consumer do
4
- describe "#regexp" do
5
- it "returns the original pattern for the consumer" do
6
- re = RE2::Regexp.new('(\w+)')
7
- consumer = re.consume("It is a truth")
8
-
9
- consumer.regexp.must_be_same_as(re)
10
- end
11
- end
12
-
13
- describe "#string" do
14
- it "returns the original text for the consumer" do
15
- re = RE2::Regexp.new('(\w+)')
16
- text = "It is a truth"
17
- consumer = re.consume(text)
18
-
19
- consumer.string.must_be_same_as(text)
20
- end
21
- end
22
-
23
- describe "#consume" do
24
- it "returns the next array of matches" do
25
- r = RE2::Regexp.new('(\w+)')
26
- consumer = r.consume("It is a truth universally acknowledged")
27
- consumer.consume.must_equal(["It"])
28
- consumer.consume.must_equal(["is"])
29
- consumer.consume.must_equal(["a"])
30
- consumer.consume.must_equal(["truth"])
31
- consumer.consume.must_equal(["universally"])
32
- consumer.consume.must_equal(["acknowledged"])
33
- consumer.consume.must_be_nil
34
- end
35
-
36
- it "returns an empty array if there are no capturing groups" do
37
- r = RE2::Regexp.new('\w+')
38
- consumer = r.consume("Foo bar")
39
- consumer.consume.must_equal([])
40
- end
41
-
42
- it "returns nil if there is no match" do
43
- r = RE2::Regexp.new('\d+')
44
- consumer = r.consume("Foo bar")
45
- consumer.consume.must_be_nil
46
- end
47
- end
48
-
49
- it "is enumerable" do
50
- r = RE2::Regexp.new('(\d)')
51
- consumer = r.consume("There are 1 some 2 numbers 3")
52
- consumer.must_be_kind_of(Enumerable)
53
- end
54
-
55
- describe "#each" do
56
- it "yields each match" do
57
- r = RE2::Regexp.new('(\d)')
58
- consumer = r.consume("There are 1 some 2 numbers 3")
59
- matches = []
60
- consumer.each do |match|
61
- matches << match
62
- end
63
-
64
- matches.must_equal([["1"], ["2"], ["3"]])
65
- end
66
-
67
- it "returns an enumerator when not given a block" do
68
- r = RE2::Regexp.new('(\d)')
69
- consumer = r.consume("There are 1 some 2 numbers 3")
70
-
71
- # Prior to Ruby 1.9, Enumerator was within Enumerable.
72
- if defined?(Enumerator)
73
- consumer.each.must_be_kind_of(Enumerator)
74
- elsif defined?(Enumerable::Enumerator)
75
- consumer.each.must_be_kind_of(Enumerable::Enumerator)
76
- end
77
- end
78
- end
79
-
80
- describe "#rewind" do
81
- it "resets any consumption" do
82
- r = RE2::Regexp.new('(\d)')
83
- consumer = r.consume("There are 1 some 2 numbers 3")
84
- consumer.to_enum.first.must_equal(["1"])
85
- consumer.to_enum.first.must_equal(["2"])
86
- consumer.rewind
87
- consumer.to_enum.first.must_equal(["1"])
88
- end
89
- end
90
- end