grpc 1.31.0.pre1 → 1.31.0.pre2

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of grpc might be problematic. Click here for more details.

Files changed (55) hide show
  1. checksums.yaml +4 -4
  2. data/Makefile +2 -2
  3. data/src/core/ext/filters/client_channel/lb_policy/weighted_target/weighted_target.cc +3 -4
  4. data/src/core/ext/filters/client_channel/lb_policy/xds/xds_routing.cc +5 -4
  5. data/src/ruby/lib/grpc/version.rb +1 -1
  6. data/third_party/re2/re2/bitmap256.h +117 -0
  7. data/third_party/re2/re2/bitstate.cc +385 -0
  8. data/third_party/re2/re2/compile.cc +1279 -0
  9. data/third_party/re2/re2/dfa.cc +2130 -0
  10. data/third_party/re2/re2/filtered_re2.cc +121 -0
  11. data/third_party/re2/re2/filtered_re2.h +109 -0
  12. data/third_party/re2/re2/mimics_pcre.cc +197 -0
  13. data/third_party/re2/re2/nfa.cc +713 -0
  14. data/third_party/re2/re2/onepass.cc +623 -0
  15. data/third_party/re2/re2/parse.cc +2464 -0
  16. data/third_party/re2/re2/perl_groups.cc +119 -0
  17. data/third_party/re2/re2/pod_array.h +55 -0
  18. data/third_party/re2/re2/prefilter.cc +710 -0
  19. data/third_party/re2/re2/prefilter.h +108 -0
  20. data/third_party/re2/re2/prefilter_tree.cc +407 -0
  21. data/third_party/re2/re2/prefilter_tree.h +139 -0
  22. data/third_party/re2/re2/prog.cc +988 -0
  23. data/third_party/re2/re2/prog.h +436 -0
  24. data/third_party/re2/re2/re2.cc +1362 -0
  25. data/third_party/re2/re2/re2.h +1002 -0
  26. data/third_party/re2/re2/regexp.cc +980 -0
  27. data/third_party/re2/re2/regexp.h +659 -0
  28. data/third_party/re2/re2/set.cc +154 -0
  29. data/third_party/re2/re2/set.h +80 -0
  30. data/third_party/re2/re2/simplify.cc +657 -0
  31. data/third_party/re2/re2/sparse_array.h +392 -0
  32. data/third_party/re2/re2/sparse_set.h +264 -0
  33. data/third_party/re2/re2/stringpiece.cc +65 -0
  34. data/third_party/re2/re2/stringpiece.h +210 -0
  35. data/third_party/re2/re2/tostring.cc +351 -0
  36. data/third_party/re2/re2/unicode_casefold.cc +582 -0
  37. data/third_party/re2/re2/unicode_casefold.h +78 -0
  38. data/third_party/re2/re2/unicode_groups.cc +6269 -0
  39. data/third_party/re2/re2/unicode_groups.h +67 -0
  40. data/third_party/re2/re2/walker-inl.h +246 -0
  41. data/third_party/re2/util/benchmark.h +156 -0
  42. data/third_party/re2/util/flags.h +26 -0
  43. data/third_party/re2/util/logging.h +109 -0
  44. data/third_party/re2/util/malloc_counter.h +19 -0
  45. data/third_party/re2/util/mix.h +41 -0
  46. data/third_party/re2/util/mutex.h +148 -0
  47. data/third_party/re2/util/pcre.cc +1025 -0
  48. data/third_party/re2/util/pcre.h +681 -0
  49. data/third_party/re2/util/rune.cc +260 -0
  50. data/third_party/re2/util/strutil.cc +149 -0
  51. data/third_party/re2/util/strutil.h +21 -0
  52. data/third_party/re2/util/test.h +50 -0
  53. data/third_party/re2/util/utf.h +44 -0
  54. data/third_party/re2/util/util.h +42 -0
  55. metadata +78 -29
@@ -0,0 +1,119 @@
1
+ // GENERATED BY make_perl_groups.pl; DO NOT EDIT.
2
+ // make_perl_groups.pl >perl_groups.cc
3
+
4
+ #include "re2/unicode_groups.h"
5
+
6
+ namespace re2 {
7
+
8
+ static const URange16 code1[] = { /* \d */
9
+ { 0x30, 0x39 },
10
+ };
11
+ static const URange16 code2[] = { /* \s */
12
+ { 0x9, 0xa },
13
+ { 0xc, 0xd },
14
+ { 0x20, 0x20 },
15
+ };
16
+ static const URange16 code3[] = { /* \w */
17
+ { 0x30, 0x39 },
18
+ { 0x41, 0x5a },
19
+ { 0x5f, 0x5f },
20
+ { 0x61, 0x7a },
21
+ };
22
+ const UGroup perl_groups[] = {
23
+ { "\\d", +1, code1, 1 },
24
+ { "\\D", -1, code1, 1 },
25
+ { "\\s", +1, code2, 3 },
26
+ { "\\S", -1, code2, 3 },
27
+ { "\\w", +1, code3, 4 },
28
+ { "\\W", -1, code3, 4 },
29
+ };
30
+ const int num_perl_groups = 6;
31
+ static const URange16 code4[] = { /* [:alnum:] */
32
+ { 0x30, 0x39 },
33
+ { 0x41, 0x5a },
34
+ { 0x61, 0x7a },
35
+ };
36
+ static const URange16 code5[] = { /* [:alpha:] */
37
+ { 0x41, 0x5a },
38
+ { 0x61, 0x7a },
39
+ };
40
+ static const URange16 code6[] = { /* [:ascii:] */
41
+ { 0x0, 0x7f },
42
+ };
43
+ static const URange16 code7[] = { /* [:blank:] */
44
+ { 0x9, 0x9 },
45
+ { 0x20, 0x20 },
46
+ };
47
+ static const URange16 code8[] = { /* [:cntrl:] */
48
+ { 0x0, 0x1f },
49
+ { 0x7f, 0x7f },
50
+ };
51
+ static const URange16 code9[] = { /* [:digit:] */
52
+ { 0x30, 0x39 },
53
+ };
54
+ static const URange16 code10[] = { /* [:graph:] */
55
+ { 0x21, 0x7e },
56
+ };
57
+ static const URange16 code11[] = { /* [:lower:] */
58
+ { 0x61, 0x7a },
59
+ };
60
+ static const URange16 code12[] = { /* [:print:] */
61
+ { 0x20, 0x7e },
62
+ };
63
+ static const URange16 code13[] = { /* [:punct:] */
64
+ { 0x21, 0x2f },
65
+ { 0x3a, 0x40 },
66
+ { 0x5b, 0x60 },
67
+ { 0x7b, 0x7e },
68
+ };
69
+ static const URange16 code14[] = { /* [:space:] */
70
+ { 0x9, 0xd },
71
+ { 0x20, 0x20 },
72
+ };
73
+ static const URange16 code15[] = { /* [:upper:] */
74
+ { 0x41, 0x5a },
75
+ };
76
+ static const URange16 code16[] = { /* [:word:] */
77
+ { 0x30, 0x39 },
78
+ { 0x41, 0x5a },
79
+ { 0x5f, 0x5f },
80
+ { 0x61, 0x7a },
81
+ };
82
+ static const URange16 code17[] = { /* [:xdigit:] */
83
+ { 0x30, 0x39 },
84
+ { 0x41, 0x46 },
85
+ { 0x61, 0x66 },
86
+ };
87
+ const UGroup posix_groups[] = {
88
+ { "[:alnum:]", +1, code4, 3 },
89
+ { "[:^alnum:]", -1, code4, 3 },
90
+ { "[:alpha:]", +1, code5, 2 },
91
+ { "[:^alpha:]", -1, code5, 2 },
92
+ { "[:ascii:]", +1, code6, 1 },
93
+ { "[:^ascii:]", -1, code6, 1 },
94
+ { "[:blank:]", +1, code7, 2 },
95
+ { "[:^blank:]", -1, code7, 2 },
96
+ { "[:cntrl:]", +1, code8, 2 },
97
+ { "[:^cntrl:]", -1, code8, 2 },
98
+ { "[:digit:]", +1, code9, 1 },
99
+ { "[:^digit:]", -1, code9, 1 },
100
+ { "[:graph:]", +1, code10, 1 },
101
+ { "[:^graph:]", -1, code10, 1 },
102
+ { "[:lower:]", +1, code11, 1 },
103
+ { "[:^lower:]", -1, code11, 1 },
104
+ { "[:print:]", +1, code12, 1 },
105
+ { "[:^print:]", -1, code12, 1 },
106
+ { "[:punct:]", +1, code13, 4 },
107
+ { "[:^punct:]", -1, code13, 4 },
108
+ { "[:space:]", +1, code14, 2 },
109
+ { "[:^space:]", -1, code14, 2 },
110
+ { "[:upper:]", +1, code15, 1 },
111
+ { "[:^upper:]", -1, code15, 1 },
112
+ { "[:word:]", +1, code16, 4 },
113
+ { "[:^word:]", -1, code16, 4 },
114
+ { "[:xdigit:]", +1, code17, 3 },
115
+ { "[:^xdigit:]", -1, code17, 3 },
116
+ };
117
+ const int num_posix_groups = 28;
118
+
119
+ } // namespace re2
@@ -0,0 +1,55 @@
1
+ // Copyright 2018 The RE2 Authors. All Rights Reserved.
2
+ // Use of this source code is governed by a BSD-style
3
+ // license that can be found in the LICENSE file.
4
+
5
+ #ifndef RE2_POD_ARRAY_H_
6
+ #define RE2_POD_ARRAY_H_
7
+
8
+ #include <memory>
9
+ #include <type_traits>
10
+
11
+ namespace re2 {
12
+
13
+ template <typename T>
14
+ class PODArray {
15
+ public:
16
+ static_assert(std::is_trivial<T>::value && std::is_standard_layout<T>::value,
17
+ "T must be POD");
18
+
19
+ PODArray()
20
+ : ptr_() {}
21
+ explicit PODArray(int len)
22
+ : ptr_(std::allocator<T>().allocate(len), Deleter(len)) {}
23
+
24
+ T* data() const {
25
+ return ptr_.get();
26
+ }
27
+
28
+ int size() const {
29
+ return ptr_.get_deleter().len_;
30
+ }
31
+
32
+ T& operator[](int pos) const {
33
+ return ptr_[pos];
34
+ }
35
+
36
+ private:
37
+ struct Deleter {
38
+ Deleter()
39
+ : len_(0) {}
40
+ explicit Deleter(int len)
41
+ : len_(len) {}
42
+
43
+ void operator()(T* ptr) const {
44
+ std::allocator<T>().deallocate(ptr, len_);
45
+ }
46
+
47
+ int len_;
48
+ };
49
+
50
+ std::unique_ptr<T[], Deleter> ptr_;
51
+ };
52
+
53
+ } // namespace re2
54
+
55
+ #endif // RE2_POD_ARRAY_H_
@@ -0,0 +1,710 @@
1
+ // Copyright 2009 The RE2 Authors. All Rights Reserved.
2
+ // Use of this source code is governed by a BSD-style
3
+ // license that can be found in the LICENSE file.
4
+
5
+ #include "re2/prefilter.h"
6
+
7
+ #include <stddef.h>
8
+ #include <stdint.h>
9
+ #include <string>
10
+ #include <vector>
11
+
12
+ #include "util/util.h"
13
+ #include "util/logging.h"
14
+ #include "util/strutil.h"
15
+ #include "util/utf.h"
16
+ #include "re2/re2.h"
17
+ #include "re2/unicode_casefold.h"
18
+ #include "re2/walker-inl.h"
19
+
20
+ namespace re2 {
21
+
22
+ static const bool ExtraDebug = false;
23
+
24
+ typedef std::set<std::string>::iterator SSIter;
25
+ typedef std::set<std::string>::const_iterator ConstSSIter;
26
+
27
+ // Initializes a Prefilter, allocating subs_ as necessary.
28
+ Prefilter::Prefilter(Op op) {
29
+ op_ = op;
30
+ subs_ = NULL;
31
+ if (op_ == AND || op_ == OR)
32
+ subs_ = new std::vector<Prefilter*>;
33
+ }
34
+
35
+ // Destroys a Prefilter.
36
+ Prefilter::~Prefilter() {
37
+ if (subs_) {
38
+ for (size_t i = 0; i < subs_->size(); i++)
39
+ delete (*subs_)[i];
40
+ delete subs_;
41
+ subs_ = NULL;
42
+ }
43
+ }
44
+
45
+ // Simplify if the node is an empty Or or And.
46
+ Prefilter* Prefilter::Simplify() {
47
+ if (op_ != AND && op_ != OR) {
48
+ return this;
49
+ }
50
+
51
+ // Nothing left in the AND/OR.
52
+ if (subs_->empty()) {
53
+ if (op_ == AND)
54
+ op_ = ALL; // AND of nothing is true
55
+ else
56
+ op_ = NONE; // OR of nothing is false
57
+
58
+ return this;
59
+ }
60
+
61
+ // Just one subnode: throw away wrapper.
62
+ if (subs_->size() == 1) {
63
+ Prefilter* a = (*subs_)[0];
64
+ subs_->clear();
65
+ delete this;
66
+ return a->Simplify();
67
+ }
68
+
69
+ return this;
70
+ }
71
+
72
+ // Combines two Prefilters together to create an "op" (AND or OR).
73
+ // The passed Prefilters will be part of the returned Prefilter or deleted.
74
+ // Does lots of work to avoid creating unnecessarily complicated structures.
75
+ Prefilter* Prefilter::AndOr(Op op, Prefilter* a, Prefilter* b) {
76
+ // If a, b can be rewritten as op, do so.
77
+ a = a->Simplify();
78
+ b = b->Simplify();
79
+
80
+ // Canonicalize: a->op <= b->op.
81
+ if (a->op() > b->op()) {
82
+ Prefilter* t = a;
83
+ a = b;
84
+ b = t;
85
+ }
86
+
87
+ // Trivial cases.
88
+ // ALL AND b = b
89
+ // NONE OR b = b
90
+ // ALL OR b = ALL
91
+ // NONE AND b = NONE
92
+ // Don't need to look at b, because of canonicalization above.
93
+ // ALL and NONE are smallest opcodes.
94
+ if (a->op() == ALL || a->op() == NONE) {
95
+ if ((a->op() == ALL && op == AND) ||
96
+ (a->op() == NONE && op == OR)) {
97
+ delete a;
98
+ return b;
99
+ } else {
100
+ delete b;
101
+ return a;
102
+ }
103
+ }
104
+
105
+ // If a and b match op, merge their contents.
106
+ if (a->op() == op && b->op() == op) {
107
+ for (size_t i = 0; i < b->subs()->size(); i++) {
108
+ Prefilter* bb = (*b->subs())[i];
109
+ a->subs()->push_back(bb);
110
+ }
111
+ b->subs()->clear();
112
+ delete b;
113
+ return a;
114
+ }
115
+
116
+ // If a already has the same op as the op that is under construction
117
+ // add in b (similarly if b already has the same op, add in a).
118
+ if (b->op() == op) {
119
+ Prefilter* t = a;
120
+ a = b;
121
+ b = t;
122
+ }
123
+ if (a->op() == op) {
124
+ a->subs()->push_back(b);
125
+ return a;
126
+ }
127
+
128
+ // Otherwise just return the op.
129
+ Prefilter* c = new Prefilter(op);
130
+ c->subs()->push_back(a);
131
+ c->subs()->push_back(b);
132
+ return c;
133
+ }
134
+
135
+ Prefilter* Prefilter::And(Prefilter* a, Prefilter* b) {
136
+ return AndOr(AND, a, b);
137
+ }
138
+
139
+ Prefilter* Prefilter::Or(Prefilter* a, Prefilter* b) {
140
+ return AndOr(OR, a, b);
141
+ }
142
+
143
+ static void SimplifyStringSet(std::set<std::string>* ss) {
144
+ // Now make sure that the strings aren't redundant. For example, if
145
+ // we know "ab" is a required string, then it doesn't help at all to
146
+ // know that "abc" is also a required string, so delete "abc". This
147
+ // is because, when we are performing a string search to filter
148
+ // regexps, matching "ab" will already allow this regexp to be a
149
+ // candidate for match, so further matching "abc" is redundant.
150
+ // Note that we must ignore "" because find() would find it at the
151
+ // start of everything and thus we would end up erasing everything.
152
+ for (SSIter i = ss->begin(); i != ss->end(); ++i) {
153
+ if (i->empty())
154
+ continue;
155
+ SSIter j = i;
156
+ ++j;
157
+ while (j != ss->end()) {
158
+ if (j->find(*i) != std::string::npos) {
159
+ j = ss->erase(j);
160
+ continue;
161
+ }
162
+ ++j;
163
+ }
164
+ }
165
+ }
166
+
167
+ Prefilter* Prefilter::OrStrings(std::set<std::string>* ss) {
168
+ Prefilter* or_prefilter = new Prefilter(NONE);
169
+ SimplifyStringSet(ss);
170
+ for (SSIter i = ss->begin(); i != ss->end(); ++i)
171
+ or_prefilter = Or(or_prefilter, FromString(*i));
172
+ return or_prefilter;
173
+ }
174
+
175
+ static Rune ToLowerRune(Rune r) {
176
+ if (r < Runeself) {
177
+ if ('A' <= r && r <= 'Z')
178
+ r += 'a' - 'A';
179
+ return r;
180
+ }
181
+
182
+ const CaseFold *f = LookupCaseFold(unicode_tolower, num_unicode_tolower, r);
183
+ if (f == NULL || r < f->lo)
184
+ return r;
185
+ return ApplyFold(f, r);
186
+ }
187
+
188
+ static Rune ToLowerRuneLatin1(Rune r) {
189
+ if ('A' <= r && r <= 'Z')
190
+ r += 'a' - 'A';
191
+ return r;
192
+ }
193
+
194
+ Prefilter* Prefilter::FromString(const std::string& str) {
195
+ Prefilter* m = new Prefilter(Prefilter::ATOM);
196
+ m->atom_ = str;
197
+ return m;
198
+ }
199
+
200
+ // Information about a regexp used during computation of Prefilter.
201
+ // Can be thought of as information about the set of strings matching
202
+ // the given regular expression.
203
+ class Prefilter::Info {
204
+ public:
205
+ Info();
206
+ ~Info();
207
+
208
+ // More constructors. They delete their Info* arguments.
209
+ static Info* Alt(Info* a, Info* b);
210
+ static Info* Concat(Info* a, Info* b);
211
+ static Info* And(Info* a, Info* b);
212
+ static Info* Star(Info* a);
213
+ static Info* Plus(Info* a);
214
+ static Info* Quest(Info* a);
215
+ static Info* EmptyString();
216
+ static Info* NoMatch();
217
+ static Info* AnyCharOrAnyByte();
218
+ static Info* CClass(CharClass* cc, bool latin1);
219
+ static Info* Literal(Rune r);
220
+ static Info* LiteralLatin1(Rune r);
221
+ static Info* AnyMatch();
222
+
223
+ // Format Info as a string.
224
+ std::string ToString();
225
+
226
+ // Caller takes ownership of the Prefilter.
227
+ Prefilter* TakeMatch();
228
+
229
+ std::set<std::string>& exact() { return exact_; }
230
+
231
+ bool is_exact() const { return is_exact_; }
232
+
233
+ class Walker;
234
+
235
+ private:
236
+ std::set<std::string> exact_;
237
+
238
+ // When is_exact_ is true, the strings that match
239
+ // are placed in exact_. When it is no longer an exact
240
+ // set of strings that match this RE, then is_exact_
241
+ // is false and the match_ contains the required match
242
+ // criteria.
243
+ bool is_exact_;
244
+
245
+ // Accumulated Prefilter query that any
246
+ // match for this regexp is guaranteed to match.
247
+ Prefilter* match_;
248
+ };
249
+
250
+
251
+ Prefilter::Info::Info()
252
+ : is_exact_(false),
253
+ match_(NULL) {
254
+ }
255
+
256
+ Prefilter::Info::~Info() {
257
+ delete match_;
258
+ }
259
+
260
+ Prefilter* Prefilter::Info::TakeMatch() {
261
+ if (is_exact_) {
262
+ match_ = Prefilter::OrStrings(&exact_);
263
+ is_exact_ = false;
264
+ }
265
+ Prefilter* m = match_;
266
+ match_ = NULL;
267
+ return m;
268
+ }
269
+
270
+ // Format a Info in string form.
271
+ std::string Prefilter::Info::ToString() {
272
+ if (is_exact_) {
273
+ int n = 0;
274
+ std::string s;
275
+ for (SSIter i = exact_.begin(); i != exact_.end(); ++i) {
276
+ if (n++ > 0)
277
+ s += ",";
278
+ s += *i;
279
+ }
280
+ return s;
281
+ }
282
+
283
+ if (match_)
284
+ return match_->DebugString();
285
+
286
+ return "";
287
+ }
288
+
289
+ // Add the strings from src to dst.
290
+ static void CopyIn(const std::set<std::string>& src,
291
+ std::set<std::string>* dst) {
292
+ for (ConstSSIter i = src.begin(); i != src.end(); ++i)
293
+ dst->insert(*i);
294
+ }
295
+
296
+ // Add the cross-product of a and b to dst.
297
+ // (For each string i in a and j in b, add i+j.)
298
+ static void CrossProduct(const std::set<std::string>& a,
299
+ const std::set<std::string>& b,
300
+ std::set<std::string>* dst) {
301
+ for (ConstSSIter i = a.begin(); i != a.end(); ++i)
302
+ for (ConstSSIter j = b.begin(); j != b.end(); ++j)
303
+ dst->insert(*i + *j);
304
+ }
305
+
306
+ // Concats a and b. Requires that both are exact sets.
307
+ // Forms an exact set that is a crossproduct of a and b.
308
+ Prefilter::Info* Prefilter::Info::Concat(Info* a, Info* b) {
309
+ if (a == NULL)
310
+ return b;
311
+ DCHECK(a->is_exact_);
312
+ DCHECK(b && b->is_exact_);
313
+ Info *ab = new Info();
314
+
315
+ CrossProduct(a->exact_, b->exact_, &ab->exact_);
316
+ ab->is_exact_ = true;
317
+
318
+ delete a;
319
+ delete b;
320
+ return ab;
321
+ }
322
+
323
+ // Constructs an inexact Info for ab given a and b.
324
+ // Used only when a or b is not exact or when the
325
+ // exact cross product is likely to be too big.
326
+ Prefilter::Info* Prefilter::Info::And(Info* a, Info* b) {
327
+ if (a == NULL)
328
+ return b;
329
+ if (b == NULL)
330
+ return a;
331
+
332
+ Info *ab = new Info();
333
+
334
+ ab->match_ = Prefilter::And(a->TakeMatch(), b->TakeMatch());
335
+ ab->is_exact_ = false;
336
+ delete a;
337
+ delete b;
338
+ return ab;
339
+ }
340
+
341
+ // Constructs Info for a|b given a and b.
342
+ Prefilter::Info* Prefilter::Info::Alt(Info* a, Info* b) {
343
+ Info *ab = new Info();
344
+
345
+ if (a->is_exact_ && b->is_exact_) {
346
+ CopyIn(a->exact_, &ab->exact_);
347
+ CopyIn(b->exact_, &ab->exact_);
348
+ ab->is_exact_ = true;
349
+ } else {
350
+ // Either a or b has is_exact_ = false. If the other
351
+ // one has is_exact_ = true, we move it to match_ and
352
+ // then create a OR of a,b. The resulting Info has
353
+ // is_exact_ = false.
354
+ ab->match_ = Prefilter::Or(a->TakeMatch(), b->TakeMatch());
355
+ ab->is_exact_ = false;
356
+ }
357
+
358
+ delete a;
359
+ delete b;
360
+ return ab;
361
+ }
362
+
363
+ // Constructs Info for a? given a.
364
+ Prefilter::Info* Prefilter::Info::Quest(Info *a) {
365
+ Info *ab = new Info();
366
+
367
+ ab->is_exact_ = false;
368
+ ab->match_ = new Prefilter(ALL);
369
+ delete a;
370
+ return ab;
371
+ }
372
+
373
+ // Constructs Info for a* given a.
374
+ // Same as a? -- not much to do.
375
+ Prefilter::Info* Prefilter::Info::Star(Info *a) {
376
+ return Quest(a);
377
+ }
378
+
379
+ // Constructs Info for a+ given a. If a was exact set, it isn't
380
+ // anymore.
381
+ Prefilter::Info* Prefilter::Info::Plus(Info *a) {
382
+ Info *ab = new Info();
383
+
384
+ ab->match_ = a->TakeMatch();
385
+ ab->is_exact_ = false;
386
+
387
+ delete a;
388
+ return ab;
389
+ }
390
+
391
+ static std::string RuneToString(Rune r) {
392
+ char buf[UTFmax];
393
+ int n = runetochar(buf, &r);
394
+ return std::string(buf, n);
395
+ }
396
+
397
+ static std::string RuneToStringLatin1(Rune r) {
398
+ char c = r & 0xff;
399
+ return std::string(&c, 1);
400
+ }
401
+
402
+ // Constructs Info for literal rune.
403
+ Prefilter::Info* Prefilter::Info::Literal(Rune r) {
404
+ Info* info = new Info();
405
+ info->exact_.insert(RuneToString(ToLowerRune(r)));
406
+ info->is_exact_ = true;
407
+ return info;
408
+ }
409
+
410
+ // Constructs Info for literal rune for Latin1 encoded string.
411
+ Prefilter::Info* Prefilter::Info::LiteralLatin1(Rune r) {
412
+ Info* info = new Info();
413
+ info->exact_.insert(RuneToStringLatin1(ToLowerRuneLatin1(r)));
414
+ info->is_exact_ = true;
415
+ return info;
416
+ }
417
+
418
+ // Constructs Info for dot (any character) or \C (any byte).
419
+ Prefilter::Info* Prefilter::Info::AnyCharOrAnyByte() {
420
+ Prefilter::Info* info = new Prefilter::Info();
421
+ info->match_ = new Prefilter(ALL);
422
+ return info;
423
+ }
424
+
425
+ // Constructs Prefilter::Info for no possible match.
426
+ Prefilter::Info* Prefilter::Info::NoMatch() {
427
+ Prefilter::Info* info = new Prefilter::Info();
428
+ info->match_ = new Prefilter(NONE);
429
+ return info;
430
+ }
431
+
432
+ // Constructs Prefilter::Info for any possible match.
433
+ // This Prefilter::Info is valid for any regular expression,
434
+ // since it makes no assertions whatsoever about the
435
+ // strings being matched.
436
+ Prefilter::Info* Prefilter::Info::AnyMatch() {
437
+ Prefilter::Info *info = new Prefilter::Info();
438
+ info->match_ = new Prefilter(ALL);
439
+ return info;
440
+ }
441
+
442
+ // Constructs Prefilter::Info for just the empty string.
443
+ Prefilter::Info* Prefilter::Info::EmptyString() {
444
+ Prefilter::Info* info = new Prefilter::Info();
445
+ info->is_exact_ = true;
446
+ info->exact_.insert("");
447
+ return info;
448
+ }
449
+
450
+ // Constructs Prefilter::Info for a character class.
451
+ typedef CharClass::iterator CCIter;
452
+ Prefilter::Info* Prefilter::Info::CClass(CharClass *cc,
453
+ bool latin1) {
454
+ if (ExtraDebug) {
455
+ LOG(ERROR) << "CharClassInfo:";
456
+ for (CCIter i = cc->begin(); i != cc->end(); ++i)
457
+ LOG(ERROR) << " " << i->lo << "-" << i->hi;
458
+ }
459
+
460
+ // If the class is too large, it's okay to overestimate.
461
+ if (cc->size() > 10)
462
+ return AnyCharOrAnyByte();
463
+
464
+ Prefilter::Info *a = new Prefilter::Info();
465
+ for (CCIter i = cc->begin(); i != cc->end(); ++i)
466
+ for (Rune r = i->lo; r <= i->hi; r++) {
467
+ if (latin1) {
468
+ a->exact_.insert(RuneToStringLatin1(ToLowerRuneLatin1(r)));
469
+ } else {
470
+ a->exact_.insert(RuneToString(ToLowerRune(r)));
471
+ }
472
+ }
473
+
474
+
475
+ a->is_exact_ = true;
476
+
477
+ if (ExtraDebug)
478
+ LOG(ERROR) << " = " << a->ToString();
479
+
480
+ return a;
481
+ }
482
+
483
+ class Prefilter::Info::Walker : public Regexp::Walker<Prefilter::Info*> {
484
+ public:
485
+ Walker(bool latin1) : latin1_(latin1) {}
486
+
487
+ virtual Info* PostVisit(
488
+ Regexp* re, Info* parent_arg,
489
+ Info* pre_arg,
490
+ Info** child_args, int nchild_args);
491
+
492
+ virtual Info* ShortVisit(
493
+ Regexp* re,
494
+ Info* parent_arg);
495
+
496
+ bool latin1() { return latin1_; }
497
+ private:
498
+ bool latin1_;
499
+
500
+ Walker(const Walker&) = delete;
501
+ Walker& operator=(const Walker&) = delete;
502
+ };
503
+
504
+ Prefilter::Info* Prefilter::BuildInfo(Regexp* re) {
505
+ if (ExtraDebug)
506
+ LOG(ERROR) << "BuildPrefilter::Info: " << re->ToString();
507
+
508
+ bool latin1 = (re->parse_flags() & Regexp::Latin1) != 0;
509
+ Prefilter::Info::Walker w(latin1);
510
+ Prefilter::Info* info = w.WalkExponential(re, NULL, 100000);
511
+
512
+ if (w.stopped_early()) {
513
+ delete info;
514
+ return NULL;
515
+ }
516
+
517
+ return info;
518
+ }
519
+
520
+ Prefilter::Info* Prefilter::Info::Walker::ShortVisit(
521
+ Regexp* re, Prefilter::Info* parent_arg) {
522
+ return AnyMatch();
523
+ }
524
+
525
+ // Constructs the Prefilter::Info for the given regular expression.
526
+ // Assumes re is simplified.
527
+ Prefilter::Info* Prefilter::Info::Walker::PostVisit(
528
+ Regexp* re, Prefilter::Info* parent_arg,
529
+ Prefilter::Info* pre_arg, Prefilter::Info** child_args,
530
+ int nchild_args) {
531
+ Prefilter::Info *info;
532
+ switch (re->op()) {
533
+ default:
534
+ case kRegexpRepeat:
535
+ LOG(DFATAL) << "Bad regexp op " << re->op();
536
+ info = EmptyString();
537
+ break;
538
+
539
+ case kRegexpNoMatch:
540
+ info = NoMatch();
541
+ break;
542
+
543
+ // These ops match the empty string:
544
+ case kRegexpEmptyMatch: // anywhere
545
+ case kRegexpBeginLine: // at beginning of line
546
+ case kRegexpEndLine: // at end of line
547
+ case kRegexpBeginText: // at beginning of text
548
+ case kRegexpEndText: // at end of text
549
+ case kRegexpWordBoundary: // at word boundary
550
+ case kRegexpNoWordBoundary: // not at word boundary
551
+ info = EmptyString();
552
+ break;
553
+
554
+ case kRegexpLiteral:
555
+ if (latin1()) {
556
+ info = LiteralLatin1(re->rune());
557
+ }
558
+ else {
559
+ info = Literal(re->rune());
560
+ }
561
+ break;
562
+
563
+ case kRegexpLiteralString:
564
+ if (re->nrunes() == 0) {
565
+ info = NoMatch();
566
+ break;
567
+ }
568
+ if (latin1()) {
569
+ info = LiteralLatin1(re->runes()[0]);
570
+ for (int i = 1; i < re->nrunes(); i++) {
571
+ info = Concat(info, LiteralLatin1(re->runes()[i]));
572
+ }
573
+ } else {
574
+ info = Literal(re->runes()[0]);
575
+ for (int i = 1; i < re->nrunes(); i++) {
576
+ info = Concat(info, Literal(re->runes()[i]));
577
+ }
578
+ }
579
+ break;
580
+
581
+ case kRegexpConcat: {
582
+ // Accumulate in info.
583
+ // Exact is concat of recent contiguous exact nodes.
584
+ info = NULL;
585
+ Info* exact = NULL;
586
+ for (int i = 0; i < nchild_args; i++) {
587
+ Info* ci = child_args[i]; // child info
588
+ if (!ci->is_exact() ||
589
+ (exact && ci->exact().size() * exact->exact().size() > 16)) {
590
+ // Exact run is over.
591
+ info = And(info, exact);
592
+ exact = NULL;
593
+ // Add this child's info.
594
+ info = And(info, ci);
595
+ } else {
596
+ // Append to exact run.
597
+ exact = Concat(exact, ci);
598
+ }
599
+ }
600
+ info = And(info, exact);
601
+ }
602
+ break;
603
+
604
+ case kRegexpAlternate:
605
+ info = child_args[0];
606
+ for (int i = 1; i < nchild_args; i++)
607
+ info = Alt(info, child_args[i]);
608
+ break;
609
+
610
+ case kRegexpStar:
611
+ info = Star(child_args[0]);
612
+ break;
613
+
614
+ case kRegexpQuest:
615
+ info = Quest(child_args[0]);
616
+ break;
617
+
618
+ case kRegexpPlus:
619
+ info = Plus(child_args[0]);
620
+ break;
621
+
622
+ case kRegexpAnyChar:
623
+ case kRegexpAnyByte:
624
+ // Claim nothing, except that it's not empty.
625
+ info = AnyCharOrAnyByte();
626
+ break;
627
+
628
+ case kRegexpCharClass:
629
+ info = CClass(re->cc(), latin1());
630
+ break;
631
+
632
+ case kRegexpCapture:
633
+ // These don't affect the set of matching strings.
634
+ info = child_args[0];
635
+ break;
636
+ }
637
+
638
+ if (ExtraDebug)
639
+ LOG(ERROR) << "BuildInfo " << re->ToString()
640
+ << ": " << (info ? info->ToString() : "");
641
+
642
+ return info;
643
+ }
644
+
645
+
646
+ Prefilter* Prefilter::FromRegexp(Regexp* re) {
647
+ if (re == NULL)
648
+ return NULL;
649
+
650
+ Regexp* simple = re->Simplify();
651
+ Prefilter::Info *info = BuildInfo(simple);
652
+
653
+ simple->Decref();
654
+ if (info == NULL)
655
+ return NULL;
656
+
657
+ Prefilter* m = info->TakeMatch();
658
+
659
+ delete info;
660
+ return m;
661
+ }
662
+
663
+ std::string Prefilter::DebugString() const {
664
+ switch (op_) {
665
+ default:
666
+ LOG(DFATAL) << "Bad op in Prefilter::DebugString: " << op_;
667
+ return StringPrintf("op%d", op_);
668
+ case NONE:
669
+ return "*no-matches*";
670
+ case ATOM:
671
+ return atom_;
672
+ case ALL:
673
+ return "";
674
+ case AND: {
675
+ std::string s = "";
676
+ for (size_t i = 0; i < subs_->size(); i++) {
677
+ if (i > 0)
678
+ s += " ";
679
+ Prefilter* sub = (*subs_)[i];
680
+ s += sub ? sub->DebugString() : "<nil>";
681
+ }
682
+ return s;
683
+ }
684
+ case OR: {
685
+ std::string s = "(";
686
+ for (size_t i = 0; i < subs_->size(); i++) {
687
+ if (i > 0)
688
+ s += "|";
689
+ Prefilter* sub = (*subs_)[i];
690
+ s += sub ? sub->DebugString() : "<nil>";
691
+ }
692
+ s += ")";
693
+ return s;
694
+ }
695
+ }
696
+ }
697
+
698
+ Prefilter* Prefilter::FromRE2(const RE2* re2) {
699
+ if (re2 == NULL)
700
+ return NULL;
701
+
702
+ Regexp* regexp = re2->Regexp();
703
+ if (regexp == NULL)
704
+ return NULL;
705
+
706
+ return FromRegexp(regexp);
707
+ }
708
+
709
+
710
+ } // namespace re2