grpc 1.31.0.pre1 → 1.31.0.pre2

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of grpc might be problematic. Click here for more details.

Files changed (55) hide show
  1. checksums.yaml +4 -4
  2. data/Makefile +2 -2
  3. data/src/core/ext/filters/client_channel/lb_policy/weighted_target/weighted_target.cc +3 -4
  4. data/src/core/ext/filters/client_channel/lb_policy/xds/xds_routing.cc +5 -4
  5. data/src/ruby/lib/grpc/version.rb +1 -1
  6. data/third_party/re2/re2/bitmap256.h +117 -0
  7. data/third_party/re2/re2/bitstate.cc +385 -0
  8. data/third_party/re2/re2/compile.cc +1279 -0
  9. data/third_party/re2/re2/dfa.cc +2130 -0
  10. data/third_party/re2/re2/filtered_re2.cc +121 -0
  11. data/third_party/re2/re2/filtered_re2.h +109 -0
  12. data/third_party/re2/re2/mimics_pcre.cc +197 -0
  13. data/third_party/re2/re2/nfa.cc +713 -0
  14. data/third_party/re2/re2/onepass.cc +623 -0
  15. data/third_party/re2/re2/parse.cc +2464 -0
  16. data/third_party/re2/re2/perl_groups.cc +119 -0
  17. data/third_party/re2/re2/pod_array.h +55 -0
  18. data/third_party/re2/re2/prefilter.cc +710 -0
  19. data/third_party/re2/re2/prefilter.h +108 -0
  20. data/third_party/re2/re2/prefilter_tree.cc +407 -0
  21. data/third_party/re2/re2/prefilter_tree.h +139 -0
  22. data/third_party/re2/re2/prog.cc +988 -0
  23. data/third_party/re2/re2/prog.h +436 -0
  24. data/third_party/re2/re2/re2.cc +1362 -0
  25. data/third_party/re2/re2/re2.h +1002 -0
  26. data/third_party/re2/re2/regexp.cc +980 -0
  27. data/third_party/re2/re2/regexp.h +659 -0
  28. data/third_party/re2/re2/set.cc +154 -0
  29. data/third_party/re2/re2/set.h +80 -0
  30. data/third_party/re2/re2/simplify.cc +657 -0
  31. data/third_party/re2/re2/sparse_array.h +392 -0
  32. data/third_party/re2/re2/sparse_set.h +264 -0
  33. data/third_party/re2/re2/stringpiece.cc +65 -0
  34. data/third_party/re2/re2/stringpiece.h +210 -0
  35. data/third_party/re2/re2/tostring.cc +351 -0
  36. data/third_party/re2/re2/unicode_casefold.cc +582 -0
  37. data/third_party/re2/re2/unicode_casefold.h +78 -0
  38. data/third_party/re2/re2/unicode_groups.cc +6269 -0
  39. data/third_party/re2/re2/unicode_groups.h +67 -0
  40. data/third_party/re2/re2/walker-inl.h +246 -0
  41. data/third_party/re2/util/benchmark.h +156 -0
  42. data/third_party/re2/util/flags.h +26 -0
  43. data/third_party/re2/util/logging.h +109 -0
  44. data/third_party/re2/util/malloc_counter.h +19 -0
  45. data/third_party/re2/util/mix.h +41 -0
  46. data/third_party/re2/util/mutex.h +148 -0
  47. data/third_party/re2/util/pcre.cc +1025 -0
  48. data/third_party/re2/util/pcre.h +681 -0
  49. data/third_party/re2/util/rune.cc +260 -0
  50. data/third_party/re2/util/strutil.cc +149 -0
  51. data/third_party/re2/util/strutil.h +21 -0
  52. data/third_party/re2/util/test.h +50 -0
  53. data/third_party/re2/util/utf.h +44 -0
  54. data/third_party/re2/util/util.h +42 -0
  55. metadata +78 -29
@@ -0,0 +1,154 @@
1
+ // Copyright 2010 The RE2 Authors. All Rights Reserved.
2
+ // Use of this source code is governed by a BSD-style
3
+ // license that can be found in the LICENSE file.
4
+
5
+ #include "re2/set.h"
6
+
7
+ #include <stddef.h>
8
+ #include <algorithm>
9
+ #include <memory>
10
+
11
+ #include "util/util.h"
12
+ #include "util/logging.h"
13
+ #include "re2/pod_array.h"
14
+ #include "re2/prog.h"
15
+ #include "re2/re2.h"
16
+ #include "re2/regexp.h"
17
+ #include "re2/stringpiece.h"
18
+
19
+ namespace re2 {
20
+
21
+ RE2::Set::Set(const RE2::Options& options, RE2::Anchor anchor) {
22
+ options_.Copy(options);
23
+ options_.set_never_capture(true); // might unblock some optimisations
24
+ anchor_ = anchor;
25
+ prog_ = NULL;
26
+ compiled_ = false;
27
+ size_ = 0;
28
+ }
29
+
30
+ RE2::Set::~Set() {
31
+ for (size_t i = 0; i < elem_.size(); i++)
32
+ elem_[i].second->Decref();
33
+ delete prog_;
34
+ }
35
+
36
+ int RE2::Set::Add(const StringPiece& pattern, std::string* error) {
37
+ if (compiled_) {
38
+ LOG(DFATAL) << "RE2::Set::Add() called after compiling";
39
+ return -1;
40
+ }
41
+
42
+ Regexp::ParseFlags pf = static_cast<Regexp::ParseFlags>(
43
+ options_.ParseFlags());
44
+ RegexpStatus status;
45
+ re2::Regexp* re = Regexp::Parse(pattern, pf, &status);
46
+ if (re == NULL) {
47
+ if (error != NULL)
48
+ *error = status.Text();
49
+ if (options_.log_errors())
50
+ LOG(ERROR) << "Error parsing '" << pattern << "': " << status.Text();
51
+ return -1;
52
+ }
53
+
54
+ // Concatenate with match index and push on vector.
55
+ int n = static_cast<int>(elem_.size());
56
+ re2::Regexp* m = re2::Regexp::HaveMatch(n, pf);
57
+ if (re->op() == kRegexpConcat) {
58
+ int nsub = re->nsub();
59
+ PODArray<re2::Regexp*> sub(nsub + 1);
60
+ for (int i = 0; i < nsub; i++)
61
+ sub[i] = re->sub()[i]->Incref();
62
+ sub[nsub] = m;
63
+ re->Decref();
64
+ re = re2::Regexp::Concat(sub.data(), nsub + 1, pf);
65
+ } else {
66
+ re2::Regexp* sub[2];
67
+ sub[0] = re;
68
+ sub[1] = m;
69
+ re = re2::Regexp::Concat(sub, 2, pf);
70
+ }
71
+ elem_.emplace_back(std::string(pattern), re);
72
+ return n;
73
+ }
74
+
75
+ bool RE2::Set::Compile() {
76
+ if (compiled_) {
77
+ LOG(DFATAL) << "RE2::Set::Compile() called more than once";
78
+ return false;
79
+ }
80
+ compiled_ = true;
81
+ size_ = static_cast<int>(elem_.size());
82
+
83
+ // Sort the elements by their patterns. This is good enough for now
84
+ // until we have a Regexp comparison function. (Maybe someday...)
85
+ std::sort(elem_.begin(), elem_.end(),
86
+ [](const Elem& a, const Elem& b) -> bool {
87
+ return a.first < b.first;
88
+ });
89
+
90
+ PODArray<re2::Regexp*> sub(size_);
91
+ for (int i = 0; i < size_; i++)
92
+ sub[i] = elem_[i].second;
93
+ elem_.clear();
94
+ elem_.shrink_to_fit();
95
+
96
+ Regexp::ParseFlags pf = static_cast<Regexp::ParseFlags>(
97
+ options_.ParseFlags());
98
+ re2::Regexp* re = re2::Regexp::Alternate(sub.data(), size_, pf);
99
+
100
+ prog_ = Prog::CompileSet(re, anchor_, options_.max_mem());
101
+ re->Decref();
102
+ return prog_ != NULL;
103
+ }
104
+
105
+ bool RE2::Set::Match(const StringPiece& text, std::vector<int>* v) const {
106
+ return Match(text, v, NULL);
107
+ }
108
+
109
+ bool RE2::Set::Match(const StringPiece& text, std::vector<int>* v,
110
+ ErrorInfo* error_info) const {
111
+ if (!compiled_) {
112
+ LOG(DFATAL) << "RE2::Set::Match() called before compiling";
113
+ if (error_info != NULL)
114
+ error_info->kind = kNotCompiled;
115
+ return false;
116
+ }
117
+ bool dfa_failed = false;
118
+ std::unique_ptr<SparseSet> matches;
119
+ if (v != NULL) {
120
+ matches.reset(new SparseSet(size_));
121
+ v->clear();
122
+ }
123
+ bool ret = prog_->SearchDFA(text, text, Prog::kAnchored, Prog::kManyMatch,
124
+ NULL, &dfa_failed, matches.get());
125
+ if (dfa_failed) {
126
+ if (options_.log_errors())
127
+ LOG(ERROR) << "DFA out of memory: "
128
+ << "program size " << prog_->size() << ", "
129
+ << "list count " << prog_->list_count() << ", "
130
+ << "bytemap range " << prog_->bytemap_range();
131
+ if (error_info != NULL)
132
+ error_info->kind = kOutOfMemory;
133
+ return false;
134
+ }
135
+ if (ret == false) {
136
+ if (error_info != NULL)
137
+ error_info->kind = kNoError;
138
+ return false;
139
+ }
140
+ if (v != NULL) {
141
+ if (matches->empty()) {
142
+ LOG(DFATAL) << "RE2::Set::Match() matched, but no matches returned?!";
143
+ if (error_info != NULL)
144
+ error_info->kind = kInconsistent;
145
+ return false;
146
+ }
147
+ v->assign(matches->begin(), matches->end());
148
+ }
149
+ if (error_info != NULL)
150
+ error_info->kind = kNoError;
151
+ return true;
152
+ }
153
+
154
+ } // namespace re2
@@ -0,0 +1,80 @@
1
+ // Copyright 2010 The RE2 Authors. All Rights Reserved.
2
+ // Use of this source code is governed by a BSD-style
3
+ // license that can be found in the LICENSE file.
4
+
5
+ #ifndef RE2_SET_H_
6
+ #define RE2_SET_H_
7
+
8
+ #include <string>
9
+ #include <utility>
10
+ #include <vector>
11
+
12
+ #include "re2/re2.h"
13
+
14
+ namespace re2 {
15
+ class Prog;
16
+ class Regexp;
17
+ } // namespace re2
18
+
19
+ namespace re2 {
20
+
21
+ // An RE2::Set represents a collection of regexps that can
22
+ // be searched for simultaneously.
23
+ class RE2::Set {
24
+ public:
25
+ enum ErrorKind {
26
+ kNoError = 0,
27
+ kNotCompiled, // The set is not compiled.
28
+ kOutOfMemory, // The DFA ran out of memory.
29
+ kInconsistent, // The result is inconsistent. This should never happen.
30
+ };
31
+
32
+ struct ErrorInfo {
33
+ ErrorKind kind;
34
+ };
35
+
36
+ Set(const RE2::Options& options, RE2::Anchor anchor);
37
+ ~Set();
38
+
39
+ // Adds pattern to the set using the options passed to the constructor.
40
+ // Returns the index that will identify the regexp in the output of Match(),
41
+ // or -1 if the regexp cannot be parsed.
42
+ // Indices are assigned in sequential order starting from 0.
43
+ // Errors do not increment the index; if error is not NULL, *error will hold
44
+ // the error message from the parser.
45
+ int Add(const StringPiece& pattern, std::string* error);
46
+
47
+ // Compiles the set in preparation for matching.
48
+ // Returns false if the compiler runs out of memory.
49
+ // Add() must not be called again after Compile().
50
+ // Compile() must be called before Match().
51
+ bool Compile();
52
+
53
+ // Returns true if text matches at least one of the regexps in the set.
54
+ // Fills v (if not NULL) with the indices of the matching regexps.
55
+ // Callers must not expect v to be sorted.
56
+ bool Match(const StringPiece& text, std::vector<int>* v) const;
57
+
58
+ // As above, but populates error_info (if not NULL) when none of the regexps
59
+ // in the set matched. This can inform callers when DFA execution fails, for
60
+ // example, because they might wish to handle that case differently.
61
+ bool Match(const StringPiece& text, std::vector<int>* v,
62
+ ErrorInfo* error_info) const;
63
+
64
+ private:
65
+ typedef std::pair<std::string, re2::Regexp*> Elem;
66
+
67
+ RE2::Options options_;
68
+ RE2::Anchor anchor_;
69
+ std::vector<Elem> elem_;
70
+ re2::Prog* prog_;
71
+ bool compiled_;
72
+ int size_;
73
+
74
+ Set(const Set&) = delete;
75
+ Set& operator=(const Set&) = delete;
76
+ };
77
+
78
+ } // namespace re2
79
+
80
+ #endif // RE2_SET_H_
@@ -0,0 +1,657 @@
1
+ // Copyright 2006 The RE2 Authors. All Rights Reserved.
2
+ // Use of this source code is governed by a BSD-style
3
+ // license that can be found in the LICENSE file.
4
+
5
+ // Rewrite POSIX and other features in re
6
+ // to use simple extended regular expression features.
7
+ // Also sort and simplify character classes.
8
+
9
+ #include <string>
10
+
11
+ #include "util/util.h"
12
+ #include "util/logging.h"
13
+ #include "util/utf.h"
14
+ #include "re2/pod_array.h"
15
+ #include "re2/regexp.h"
16
+ #include "re2/walker-inl.h"
17
+
18
+ namespace re2 {
19
+
20
+ // Parses the regexp src and then simplifies it and sets *dst to the
21
+ // string representation of the simplified form. Returns true on success.
22
+ // Returns false and sets *error (if error != NULL) on error.
23
+ bool Regexp::SimplifyRegexp(const StringPiece& src, ParseFlags flags,
24
+ std::string* dst, RegexpStatus* status) {
25
+ Regexp* re = Parse(src, flags, status);
26
+ if (re == NULL)
27
+ return false;
28
+ Regexp* sre = re->Simplify();
29
+ re->Decref();
30
+ if (sre == NULL) {
31
+ // Should not happen, since Simplify never fails.
32
+ LOG(ERROR) << "Simplify failed on " << src;
33
+ if (status) {
34
+ status->set_code(kRegexpInternalError);
35
+ status->set_error_arg(src);
36
+ }
37
+ return false;
38
+ }
39
+ *dst = sre->ToString();
40
+ sre->Decref();
41
+ return true;
42
+ }
43
+
44
+ // Assuming the simple_ flags on the children are accurate,
45
+ // is this Regexp* simple?
46
+ bool Regexp::ComputeSimple() {
47
+ Regexp** subs;
48
+ switch (op_) {
49
+ case kRegexpNoMatch:
50
+ case kRegexpEmptyMatch:
51
+ case kRegexpLiteral:
52
+ case kRegexpLiteralString:
53
+ case kRegexpBeginLine:
54
+ case kRegexpEndLine:
55
+ case kRegexpBeginText:
56
+ case kRegexpWordBoundary:
57
+ case kRegexpNoWordBoundary:
58
+ case kRegexpEndText:
59
+ case kRegexpAnyChar:
60
+ case kRegexpAnyByte:
61
+ case kRegexpHaveMatch:
62
+ return true;
63
+ case kRegexpConcat:
64
+ case kRegexpAlternate:
65
+ // These are simple as long as the subpieces are simple.
66
+ subs = sub();
67
+ for (int i = 0; i < nsub_; i++)
68
+ if (!subs[i]->simple())
69
+ return false;
70
+ return true;
71
+ case kRegexpCharClass:
72
+ // Simple as long as the char class is not empty, not full.
73
+ if (ccb_ != NULL)
74
+ return !ccb_->empty() && !ccb_->full();
75
+ return !cc_->empty() && !cc_->full();
76
+ case kRegexpCapture:
77
+ subs = sub();
78
+ return subs[0]->simple();
79
+ case kRegexpStar:
80
+ case kRegexpPlus:
81
+ case kRegexpQuest:
82
+ subs = sub();
83
+ if (!subs[0]->simple())
84
+ return false;
85
+ switch (subs[0]->op_) {
86
+ case kRegexpStar:
87
+ case kRegexpPlus:
88
+ case kRegexpQuest:
89
+ case kRegexpEmptyMatch:
90
+ case kRegexpNoMatch:
91
+ return false;
92
+ default:
93
+ break;
94
+ }
95
+ return true;
96
+ case kRegexpRepeat:
97
+ return false;
98
+ }
99
+ LOG(DFATAL) << "Case not handled in ComputeSimple: " << op_;
100
+ return false;
101
+ }
102
+
103
+ // Walker subclass used by Simplify.
104
+ // Coalesces runs of star/plus/quest/repeat of the same literal along with any
105
+ // occurrences of that literal into repeats of that literal. It also works for
106
+ // char classes, any char and any byte.
107
+ // PostVisit creates the coalesced result, which should then be simplified.
108
+ class CoalesceWalker : public Regexp::Walker<Regexp*> {
109
+ public:
110
+ CoalesceWalker() {}
111
+ virtual Regexp* PostVisit(Regexp* re, Regexp* parent_arg, Regexp* pre_arg,
112
+ Regexp** child_args, int nchild_args);
113
+ virtual Regexp* Copy(Regexp* re);
114
+ virtual Regexp* ShortVisit(Regexp* re, Regexp* parent_arg);
115
+
116
+ private:
117
+ // These functions are declared inside CoalesceWalker so that
118
+ // they can edit the private fields of the Regexps they construct.
119
+
120
+ // Returns true if r1 and r2 can be coalesced. In particular, ensures that
121
+ // the parse flags are consistent. (They will not be checked again later.)
122
+ static bool CanCoalesce(Regexp* r1, Regexp* r2);
123
+
124
+ // Coalesces *r1ptr and *r2ptr. In most cases, the array elements afterwards
125
+ // will be empty match and the coalesced op. In other cases, where part of a
126
+ // literal string was removed to be coalesced, the array elements afterwards
127
+ // will be the coalesced op and the remainder of the literal string.
128
+ static void DoCoalesce(Regexp** r1ptr, Regexp** r2ptr);
129
+
130
+ CoalesceWalker(const CoalesceWalker&) = delete;
131
+ CoalesceWalker& operator=(const CoalesceWalker&) = delete;
132
+ };
133
+
134
+ // Walker subclass used by Simplify.
135
+ // The simplify walk is purely post-recursive: given the simplified children,
136
+ // PostVisit creates the simplified result.
137
+ // The child_args are simplified Regexp*s.
138
+ class SimplifyWalker : public Regexp::Walker<Regexp*> {
139
+ public:
140
+ SimplifyWalker() {}
141
+ virtual Regexp* PreVisit(Regexp* re, Regexp* parent_arg, bool* stop);
142
+ virtual Regexp* PostVisit(Regexp* re, Regexp* parent_arg, Regexp* pre_arg,
143
+ Regexp** child_args, int nchild_args);
144
+ virtual Regexp* Copy(Regexp* re);
145
+ virtual Regexp* ShortVisit(Regexp* re, Regexp* parent_arg);
146
+
147
+ private:
148
+ // These functions are declared inside SimplifyWalker so that
149
+ // they can edit the private fields of the Regexps they construct.
150
+
151
+ // Creates a concatenation of two Regexp, consuming refs to re1 and re2.
152
+ // Caller must Decref return value when done with it.
153
+ static Regexp* Concat2(Regexp* re1, Regexp* re2, Regexp::ParseFlags flags);
154
+
155
+ // Simplifies the expression re{min,max} in terms of *, +, and ?.
156
+ // Returns a new regexp. Does not edit re. Does not consume reference to re.
157
+ // Caller must Decref return value when done with it.
158
+ static Regexp* SimplifyRepeat(Regexp* re, int min, int max,
159
+ Regexp::ParseFlags parse_flags);
160
+
161
+ // Simplifies a character class by expanding any named classes
162
+ // into rune ranges. Does not edit re. Does not consume ref to re.
163
+ // Caller must Decref return value when done with it.
164
+ static Regexp* SimplifyCharClass(Regexp* re);
165
+
166
+ SimplifyWalker(const SimplifyWalker&) = delete;
167
+ SimplifyWalker& operator=(const SimplifyWalker&) = delete;
168
+ };
169
+
170
+ // Simplifies a regular expression, returning a new regexp.
171
+ // The new regexp uses traditional Unix egrep features only,
172
+ // plus the Perl (?:) non-capturing parentheses.
173
+ // Otherwise, no POSIX or Perl additions. The new regexp
174
+ // captures exactly the same subexpressions (with the same indices)
175
+ // as the original.
176
+ // Does not edit current object.
177
+ // Caller must Decref() return value when done with it.
178
+
179
+ Regexp* Regexp::Simplify() {
180
+ CoalesceWalker cw;
181
+ Regexp* cre = cw.Walk(this, NULL);
182
+ if (cre == NULL)
183
+ return cre;
184
+ SimplifyWalker sw;
185
+ Regexp* sre = sw.Walk(cre, NULL);
186
+ cre->Decref();
187
+ return sre;
188
+ }
189
+
190
+ #define Simplify DontCallSimplify // Avoid accidental recursion
191
+
192
+ // Utility function for PostVisit implementations that compares re->sub() with
193
+ // child_args to determine whether any child_args changed. In the common case,
194
+ // where nothing changed, calls Decref() for all child_args and returns false,
195
+ // so PostVisit must return re->Incref(). Otherwise, returns true.
196
+ static bool ChildArgsChanged(Regexp* re, Regexp** child_args) {
197
+ for (int i = 0; i < re->nsub(); i++) {
198
+ Regexp* sub = re->sub()[i];
199
+ Regexp* newsub = child_args[i];
200
+ if (newsub != sub)
201
+ return true;
202
+ }
203
+ for (int i = 0; i < re->nsub(); i++) {
204
+ Regexp* newsub = child_args[i];
205
+ newsub->Decref();
206
+ }
207
+ return false;
208
+ }
209
+
210
+ Regexp* CoalesceWalker::Copy(Regexp* re) {
211
+ return re->Incref();
212
+ }
213
+
214
+ Regexp* CoalesceWalker::ShortVisit(Regexp* re, Regexp* parent_arg) {
215
+ // Should never be called: we use Walk(), not WalkExponential().
216
+ #ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
217
+ LOG(DFATAL) << "CoalesceWalker::ShortVisit called";
218
+ #endif
219
+ return re->Incref();
220
+ }
221
+
222
+ Regexp* CoalesceWalker::PostVisit(Regexp* re,
223
+ Regexp* parent_arg,
224
+ Regexp* pre_arg,
225
+ Regexp** child_args,
226
+ int nchild_args) {
227
+ if (re->nsub() == 0)
228
+ return re->Incref();
229
+
230
+ if (re->op() != kRegexpConcat) {
231
+ if (!ChildArgsChanged(re, child_args))
232
+ return re->Incref();
233
+
234
+ // Something changed. Build a new op.
235
+ Regexp* nre = new Regexp(re->op(), re->parse_flags());
236
+ nre->AllocSub(re->nsub());
237
+ Regexp** nre_subs = nre->sub();
238
+ for (int i = 0; i < re->nsub(); i++)
239
+ nre_subs[i] = child_args[i];
240
+ // Repeats and Captures have additional data that must be copied.
241
+ if (re->op() == kRegexpRepeat) {
242
+ nre->min_ = re->min();
243
+ nre->max_ = re->max();
244
+ } else if (re->op() == kRegexpCapture) {
245
+ nre->cap_ = re->cap();
246
+ }
247
+ return nre;
248
+ }
249
+
250
+ bool can_coalesce = false;
251
+ for (int i = 0; i < re->nsub(); i++) {
252
+ if (i+1 < re->nsub() &&
253
+ CanCoalesce(child_args[i], child_args[i+1])) {
254
+ can_coalesce = true;
255
+ break;
256
+ }
257
+ }
258
+ if (!can_coalesce) {
259
+ if (!ChildArgsChanged(re, child_args))
260
+ return re->Incref();
261
+
262
+ // Something changed. Build a new op.
263
+ Regexp* nre = new Regexp(re->op(), re->parse_flags());
264
+ nre->AllocSub(re->nsub());
265
+ Regexp** nre_subs = nre->sub();
266
+ for (int i = 0; i < re->nsub(); i++)
267
+ nre_subs[i] = child_args[i];
268
+ return nre;
269
+ }
270
+
271
+ for (int i = 0; i < re->nsub(); i++) {
272
+ if (i+1 < re->nsub() &&
273
+ CanCoalesce(child_args[i], child_args[i+1]))
274
+ DoCoalesce(&child_args[i], &child_args[i+1]);
275
+ }
276
+ // Determine how many empty matches were left by DoCoalesce.
277
+ int n = 0;
278
+ for (int i = n; i < re->nsub(); i++) {
279
+ if (child_args[i]->op() == kRegexpEmptyMatch)
280
+ n++;
281
+ }
282
+ // Build a new op.
283
+ Regexp* nre = new Regexp(re->op(), re->parse_flags());
284
+ nre->AllocSub(re->nsub() - n);
285
+ Regexp** nre_subs = nre->sub();
286
+ for (int i = 0, j = 0; i < re->nsub(); i++) {
287
+ if (child_args[i]->op() == kRegexpEmptyMatch) {
288
+ child_args[i]->Decref();
289
+ continue;
290
+ }
291
+ nre_subs[j] = child_args[i];
292
+ j++;
293
+ }
294
+ return nre;
295
+ }
296
+
297
+ bool CoalesceWalker::CanCoalesce(Regexp* r1, Regexp* r2) {
298
+ // r1 must be a star/plus/quest/repeat of a literal, char class, any char or
299
+ // any byte.
300
+ if ((r1->op() == kRegexpStar ||
301
+ r1->op() == kRegexpPlus ||
302
+ r1->op() == kRegexpQuest ||
303
+ r1->op() == kRegexpRepeat) &&
304
+ (r1->sub()[0]->op() == kRegexpLiteral ||
305
+ r1->sub()[0]->op() == kRegexpCharClass ||
306
+ r1->sub()[0]->op() == kRegexpAnyChar ||
307
+ r1->sub()[0]->op() == kRegexpAnyByte)) {
308
+ // r2 must be a star/plus/quest/repeat of the same literal, char class,
309
+ // any char or any byte.
310
+ if ((r2->op() == kRegexpStar ||
311
+ r2->op() == kRegexpPlus ||
312
+ r2->op() == kRegexpQuest ||
313
+ r2->op() == kRegexpRepeat) &&
314
+ Regexp::Equal(r1->sub()[0], r2->sub()[0]) &&
315
+ // The parse flags must be consistent.
316
+ ((r1->parse_flags() & Regexp::NonGreedy) ==
317
+ (r2->parse_flags() & Regexp::NonGreedy))) {
318
+ return true;
319
+ }
320
+ // ... OR an occurrence of that literal, char class, any char or any byte
321
+ if (Regexp::Equal(r1->sub()[0], r2)) {
322
+ return true;
323
+ }
324
+ // ... OR a literal string that begins with that literal.
325
+ if (r1->sub()[0]->op() == kRegexpLiteral &&
326
+ r2->op() == kRegexpLiteralString &&
327
+ r2->runes()[0] == r1->sub()[0]->rune() &&
328
+ // The parse flags must be consistent.
329
+ ((r1->sub()[0]->parse_flags() & Regexp::FoldCase) ==
330
+ (r2->parse_flags() & Regexp::FoldCase))) {
331
+ return true;
332
+ }
333
+ }
334
+ return false;
335
+ }
336
+
337
+ void CoalesceWalker::DoCoalesce(Regexp** r1ptr, Regexp** r2ptr) {
338
+ Regexp* r1 = *r1ptr;
339
+ Regexp* r2 = *r2ptr;
340
+
341
+ Regexp* nre = Regexp::Repeat(
342
+ r1->sub()[0]->Incref(), r1->parse_flags(), 0, 0);
343
+
344
+ switch (r1->op()) {
345
+ case kRegexpStar:
346
+ nre->min_ = 0;
347
+ nre->max_ = -1;
348
+ break;
349
+
350
+ case kRegexpPlus:
351
+ nre->min_ = 1;
352
+ nre->max_ = -1;
353
+ break;
354
+
355
+ case kRegexpQuest:
356
+ nre->min_ = 0;
357
+ nre->max_ = 1;
358
+ break;
359
+
360
+ case kRegexpRepeat:
361
+ nre->min_ = r1->min();
362
+ nre->max_ = r1->max();
363
+ break;
364
+
365
+ default:
366
+ LOG(DFATAL) << "DoCoalesce failed: r1->op() is " << r1->op();
367
+ nre->Decref();
368
+ return;
369
+ }
370
+
371
+ switch (r2->op()) {
372
+ case kRegexpStar:
373
+ nre->max_ = -1;
374
+ goto LeaveEmpty;
375
+
376
+ case kRegexpPlus:
377
+ nre->min_++;
378
+ nre->max_ = -1;
379
+ goto LeaveEmpty;
380
+
381
+ case kRegexpQuest:
382
+ if (nre->max() != -1)
383
+ nre->max_++;
384
+ goto LeaveEmpty;
385
+
386
+ case kRegexpRepeat:
387
+ nre->min_ += r2->min();
388
+ if (r2->max() == -1)
389
+ nre->max_ = -1;
390
+ else if (nre->max() != -1)
391
+ nre->max_ += r2->max();
392
+ goto LeaveEmpty;
393
+
394
+ case kRegexpLiteral:
395
+ case kRegexpCharClass:
396
+ case kRegexpAnyChar:
397
+ case kRegexpAnyByte:
398
+ nre->min_++;
399
+ if (nre->max() != -1)
400
+ nre->max_++;
401
+ goto LeaveEmpty;
402
+
403
+ LeaveEmpty:
404
+ *r1ptr = new Regexp(kRegexpEmptyMatch, Regexp::NoParseFlags);
405
+ *r2ptr = nre;
406
+ break;
407
+
408
+ case kRegexpLiteralString: {
409
+ Rune r = r1->sub()[0]->rune();
410
+ // Determine how much of the literal string is removed.
411
+ // We know that we have at least one rune. :)
412
+ int n = 1;
413
+ while (n < r2->nrunes() && r2->runes()[n] == r)
414
+ n++;
415
+ nre->min_ += n;
416
+ if (nre->max() != -1)
417
+ nre->max_ += n;
418
+ if (n == r2->nrunes())
419
+ goto LeaveEmpty;
420
+ *r1ptr = nre;
421
+ *r2ptr = Regexp::LiteralString(
422
+ &r2->runes()[n], r2->nrunes() - n, r2->parse_flags());
423
+ break;
424
+ }
425
+
426
+ default:
427
+ LOG(DFATAL) << "DoCoalesce failed: r2->op() is " << r2->op();
428
+ nre->Decref();
429
+ return;
430
+ }
431
+
432
+ r1->Decref();
433
+ r2->Decref();
434
+ }
435
+
436
+ Regexp* SimplifyWalker::Copy(Regexp* re) {
437
+ return re->Incref();
438
+ }
439
+
440
+ Regexp* SimplifyWalker::ShortVisit(Regexp* re, Regexp* parent_arg) {
441
+ // Should never be called: we use Walk(), not WalkExponential().
442
+ #ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
443
+ LOG(DFATAL) << "SimplifyWalker::ShortVisit called";
444
+ #endif
445
+ return re->Incref();
446
+ }
447
+
448
+ Regexp* SimplifyWalker::PreVisit(Regexp* re, Regexp* parent_arg, bool* stop) {
449
+ if (re->simple()) {
450
+ *stop = true;
451
+ return re->Incref();
452
+ }
453
+ return NULL;
454
+ }
455
+
456
+ Regexp* SimplifyWalker::PostVisit(Regexp* re,
457
+ Regexp* parent_arg,
458
+ Regexp* pre_arg,
459
+ Regexp** child_args,
460
+ int nchild_args) {
461
+ switch (re->op()) {
462
+ case kRegexpNoMatch:
463
+ case kRegexpEmptyMatch:
464
+ case kRegexpLiteral:
465
+ case kRegexpLiteralString:
466
+ case kRegexpBeginLine:
467
+ case kRegexpEndLine:
468
+ case kRegexpBeginText:
469
+ case kRegexpWordBoundary:
470
+ case kRegexpNoWordBoundary:
471
+ case kRegexpEndText:
472
+ case kRegexpAnyChar:
473
+ case kRegexpAnyByte:
474
+ case kRegexpHaveMatch:
475
+ // All these are always simple.
476
+ re->simple_ = true;
477
+ return re->Incref();
478
+
479
+ case kRegexpConcat:
480
+ case kRegexpAlternate: {
481
+ // These are simple as long as the subpieces are simple.
482
+ if (!ChildArgsChanged(re, child_args)) {
483
+ re->simple_ = true;
484
+ return re->Incref();
485
+ }
486
+ Regexp* nre = new Regexp(re->op(), re->parse_flags());
487
+ nre->AllocSub(re->nsub());
488
+ Regexp** nre_subs = nre->sub();
489
+ for (int i = 0; i < re->nsub(); i++)
490
+ nre_subs[i] = child_args[i];
491
+ nre->simple_ = true;
492
+ return nre;
493
+ }
494
+
495
+ case kRegexpCapture: {
496
+ Regexp* newsub = child_args[0];
497
+ if (newsub == re->sub()[0]) {
498
+ newsub->Decref();
499
+ re->simple_ = true;
500
+ return re->Incref();
501
+ }
502
+ Regexp* nre = new Regexp(kRegexpCapture, re->parse_flags());
503
+ nre->AllocSub(1);
504
+ nre->sub()[0] = newsub;
505
+ nre->cap_ = re->cap();
506
+ nre->simple_ = true;
507
+ return nre;
508
+ }
509
+
510
+ case kRegexpStar:
511
+ case kRegexpPlus:
512
+ case kRegexpQuest: {
513
+ Regexp* newsub = child_args[0];
514
+ // Special case: repeat the empty string as much as
515
+ // you want, but it's still the empty string.
516
+ if (newsub->op() == kRegexpEmptyMatch)
517
+ return newsub;
518
+
519
+ // These are simple as long as the subpiece is simple.
520
+ if (newsub == re->sub()[0]) {
521
+ newsub->Decref();
522
+ re->simple_ = true;
523
+ return re->Incref();
524
+ }
525
+
526
+ // These are also idempotent if flags are constant.
527
+ if (re->op() == newsub->op() &&
528
+ re->parse_flags() == newsub->parse_flags())
529
+ return newsub;
530
+
531
+ Regexp* nre = new Regexp(re->op(), re->parse_flags());
532
+ nre->AllocSub(1);
533
+ nre->sub()[0] = newsub;
534
+ nre->simple_ = true;
535
+ return nre;
536
+ }
537
+
538
+ case kRegexpRepeat: {
539
+ Regexp* newsub = child_args[0];
540
+ // Special case: repeat the empty string as much as
541
+ // you want, but it's still the empty string.
542
+ if (newsub->op() == kRegexpEmptyMatch)
543
+ return newsub;
544
+
545
+ Regexp* nre = SimplifyRepeat(newsub, re->min_, re->max_,
546
+ re->parse_flags());
547
+ newsub->Decref();
548
+ nre->simple_ = true;
549
+ return nre;
550
+ }
551
+
552
+ case kRegexpCharClass: {
553
+ Regexp* nre = SimplifyCharClass(re);
554
+ nre->simple_ = true;
555
+ return nre;
556
+ }
557
+ }
558
+
559
+ LOG(ERROR) << "Simplify case not handled: " << re->op();
560
+ return re->Incref();
561
+ }
562
+
563
+ // Creates a concatenation of two Regexp, consuming refs to re1 and re2.
564
+ // Returns a new Regexp, handing the ref to the caller.
565
+ Regexp* SimplifyWalker::Concat2(Regexp* re1, Regexp* re2,
566
+ Regexp::ParseFlags parse_flags) {
567
+ Regexp* re = new Regexp(kRegexpConcat, parse_flags);
568
+ re->AllocSub(2);
569
+ Regexp** subs = re->sub();
570
+ subs[0] = re1;
571
+ subs[1] = re2;
572
+ return re;
573
+ }
574
+
575
+ // Simplifies the expression re{min,max} in terms of *, +, and ?.
576
+ // Returns a new regexp. Does not edit re. Does not consume reference to re.
577
+ // Caller must Decref return value when done with it.
578
+ // The result will *not* necessarily have the right capturing parens
579
+ // if you call ToString() and re-parse it: (x){2} becomes (x)(x),
580
+ // but in the Regexp* representation, both (x) are marked as $1.
581
+ Regexp* SimplifyWalker::SimplifyRepeat(Regexp* re, int min, int max,
582
+ Regexp::ParseFlags f) {
583
+ // x{n,} means at least n matches of x.
584
+ if (max == -1) {
585
+ // Special case: x{0,} is x*
586
+ if (min == 0)
587
+ return Regexp::Star(re->Incref(), f);
588
+
589
+ // Special case: x{1,} is x+
590
+ if (min == 1)
591
+ return Regexp::Plus(re->Incref(), f);
592
+
593
+ // General case: x{4,} is xxxx+
594
+ PODArray<Regexp*> nre_subs(min);
595
+ for (int i = 0; i < min-1; i++)
596
+ nre_subs[i] = re->Incref();
597
+ nre_subs[min-1] = Regexp::Plus(re->Incref(), f);
598
+ return Regexp::Concat(nre_subs.data(), min, f);
599
+ }
600
+
601
+ // Special case: (x){0} matches only empty string.
602
+ if (min == 0 && max == 0)
603
+ return new Regexp(kRegexpEmptyMatch, f);
604
+
605
+ // Special case: x{1} is just x.
606
+ if (min == 1 && max == 1)
607
+ return re->Incref();
608
+
609
+ // General case: x{n,m} means n copies of x and m copies of x?.
610
+ // The machine will do less work if we nest the final m copies,
611
+ // so that x{2,5} = xx(x(x(x)?)?)?
612
+
613
+ // Build leading prefix: xx. Capturing only on the last one.
614
+ Regexp* nre = NULL;
615
+ if (min > 0) {
616
+ PODArray<Regexp*> nre_subs(min);
617
+ for (int i = 0; i < min; i++)
618
+ nre_subs[i] = re->Incref();
619
+ nre = Regexp::Concat(nre_subs.data(), min, f);
620
+ }
621
+
622
+ // Build and attach suffix: (x(x(x)?)?)?
623
+ if (max > min) {
624
+ Regexp* suf = Regexp::Quest(re->Incref(), f);
625
+ for (int i = min+1; i < max; i++)
626
+ suf = Regexp::Quest(Concat2(re->Incref(), suf, f), f);
627
+ if (nre == NULL)
628
+ nre = suf;
629
+ else
630
+ nre = Concat2(nre, suf, f);
631
+ }
632
+
633
+ if (nre == NULL) {
634
+ // Some degenerate case, like min > max, or min < max < 0.
635
+ // This shouldn't happen, because the parser rejects such regexps.
636
+ LOG(DFATAL) << "Malformed repeat " << re->ToString() << " " << min << " " << max;
637
+ return new Regexp(kRegexpNoMatch, f);
638
+ }
639
+
640
+ return nre;
641
+ }
642
+
643
+ // Simplifies a character class.
644
+ // Caller must Decref return value when done with it.
645
+ Regexp* SimplifyWalker::SimplifyCharClass(Regexp* re) {
646
+ CharClass* cc = re->cc();
647
+
648
+ // Special cases
649
+ if (cc->empty())
650
+ return new Regexp(kRegexpNoMatch, re->parse_flags());
651
+ if (cc->full())
652
+ return new Regexp(kRegexpAnyChar, re->parse_flags());
653
+
654
+ return re->Incref();
655
+ }
656
+
657
+ } // namespace re2