grpc 1.31.0.pre1 → 1.31.0.pre2

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of grpc might be problematic. Click here for more details.

Files changed (55) hide show
  1. checksums.yaml +4 -4
  2. data/Makefile +2 -2
  3. data/src/core/ext/filters/client_channel/lb_policy/weighted_target/weighted_target.cc +3 -4
  4. data/src/core/ext/filters/client_channel/lb_policy/xds/xds_routing.cc +5 -4
  5. data/src/ruby/lib/grpc/version.rb +1 -1
  6. data/third_party/re2/re2/bitmap256.h +117 -0
  7. data/third_party/re2/re2/bitstate.cc +385 -0
  8. data/third_party/re2/re2/compile.cc +1279 -0
  9. data/third_party/re2/re2/dfa.cc +2130 -0
  10. data/third_party/re2/re2/filtered_re2.cc +121 -0
  11. data/third_party/re2/re2/filtered_re2.h +109 -0
  12. data/third_party/re2/re2/mimics_pcre.cc +197 -0
  13. data/third_party/re2/re2/nfa.cc +713 -0
  14. data/third_party/re2/re2/onepass.cc +623 -0
  15. data/third_party/re2/re2/parse.cc +2464 -0
  16. data/third_party/re2/re2/perl_groups.cc +119 -0
  17. data/third_party/re2/re2/pod_array.h +55 -0
  18. data/third_party/re2/re2/prefilter.cc +710 -0
  19. data/third_party/re2/re2/prefilter.h +108 -0
  20. data/third_party/re2/re2/prefilter_tree.cc +407 -0
  21. data/third_party/re2/re2/prefilter_tree.h +139 -0
  22. data/third_party/re2/re2/prog.cc +988 -0
  23. data/third_party/re2/re2/prog.h +436 -0
  24. data/third_party/re2/re2/re2.cc +1362 -0
  25. data/third_party/re2/re2/re2.h +1002 -0
  26. data/third_party/re2/re2/regexp.cc +980 -0
  27. data/third_party/re2/re2/regexp.h +659 -0
  28. data/third_party/re2/re2/set.cc +154 -0
  29. data/third_party/re2/re2/set.h +80 -0
  30. data/third_party/re2/re2/simplify.cc +657 -0
  31. data/third_party/re2/re2/sparse_array.h +392 -0
  32. data/third_party/re2/re2/sparse_set.h +264 -0
  33. data/third_party/re2/re2/stringpiece.cc +65 -0
  34. data/third_party/re2/re2/stringpiece.h +210 -0
  35. data/third_party/re2/re2/tostring.cc +351 -0
  36. data/third_party/re2/re2/unicode_casefold.cc +582 -0
  37. data/third_party/re2/re2/unicode_casefold.h +78 -0
  38. data/third_party/re2/re2/unicode_groups.cc +6269 -0
  39. data/third_party/re2/re2/unicode_groups.h +67 -0
  40. data/third_party/re2/re2/walker-inl.h +246 -0
  41. data/third_party/re2/util/benchmark.h +156 -0
  42. data/third_party/re2/util/flags.h +26 -0
  43. data/third_party/re2/util/logging.h +109 -0
  44. data/third_party/re2/util/malloc_counter.h +19 -0
  45. data/third_party/re2/util/mix.h +41 -0
  46. data/third_party/re2/util/mutex.h +148 -0
  47. data/third_party/re2/util/pcre.cc +1025 -0
  48. data/third_party/re2/util/pcre.h +681 -0
  49. data/third_party/re2/util/rune.cc +260 -0
  50. data/third_party/re2/util/strutil.cc +149 -0
  51. data/third_party/re2/util/strutil.h +21 -0
  52. data/third_party/re2/util/test.h +50 -0
  53. data/third_party/re2/util/utf.h +44 -0
  54. data/third_party/re2/util/util.h +42 -0
  55. metadata +78 -29
@@ -0,0 +1,980 @@
1
+ // Copyright 2006 The RE2 Authors. All Rights Reserved.
2
+ // Use of this source code is governed by a BSD-style
3
+ // license that can be found in the LICENSE file.
4
+
5
+ // Regular expression representation.
6
+ // Tested by parse_test.cc
7
+
8
+ #include "re2/regexp.h"
9
+
10
+ #include <stddef.h>
11
+ #include <stdint.h>
12
+ #include <string.h>
13
+ #include <algorithm>
14
+ #include <map>
15
+ #include <mutex>
16
+ #include <string>
17
+ #include <vector>
18
+
19
+ #include "util/util.h"
20
+ #include "util/logging.h"
21
+ #include "util/mutex.h"
22
+ #include "util/utf.h"
23
+ #include "re2/pod_array.h"
24
+ #include "re2/stringpiece.h"
25
+ #include "re2/walker-inl.h"
26
+
27
+ namespace re2 {
28
+
29
+ // Constructor. Allocates vectors as appropriate for operator.
30
+ Regexp::Regexp(RegexpOp op, ParseFlags parse_flags)
31
+ : op_(static_cast<uint8_t>(op)),
32
+ simple_(false),
33
+ parse_flags_(static_cast<uint16_t>(parse_flags)),
34
+ ref_(1),
35
+ nsub_(0),
36
+ down_(NULL) {
37
+ subone_ = NULL;
38
+ memset(the_union_, 0, sizeof the_union_);
39
+ }
40
+
41
+ // Destructor. Assumes already cleaned up children.
42
+ // Private: use Decref() instead of delete to destroy Regexps.
43
+ // Can't call Decref on the sub-Regexps here because
44
+ // that could cause arbitrarily deep recursion, so
45
+ // required Decref() to have handled them for us.
46
+ Regexp::~Regexp() {
47
+ if (nsub_ > 0)
48
+ LOG(DFATAL) << "Regexp not destroyed.";
49
+
50
+ switch (op_) {
51
+ default:
52
+ break;
53
+ case kRegexpCapture:
54
+ delete name_;
55
+ break;
56
+ case kRegexpLiteralString:
57
+ delete[] runes_;
58
+ break;
59
+ case kRegexpCharClass:
60
+ if (cc_)
61
+ cc_->Delete();
62
+ delete ccb_;
63
+ break;
64
+ }
65
+ }
66
+
67
+ // If it's possible to destroy this regexp without recurring,
68
+ // do so and return true. Else return false.
69
+ bool Regexp::QuickDestroy() {
70
+ if (nsub_ == 0) {
71
+ delete this;
72
+ return true;
73
+ }
74
+ return false;
75
+ }
76
+
77
+ // Lazily allocated.
78
+ static Mutex* ref_mutex;
79
+ static std::map<Regexp*, int>* ref_map;
80
+
81
+ int Regexp::Ref() {
82
+ if (ref_ < kMaxRef)
83
+ return ref_;
84
+
85
+ MutexLock l(ref_mutex);
86
+ return (*ref_map)[this];
87
+ }
88
+
89
+ // Increments reference count, returns object as convenience.
90
+ Regexp* Regexp::Incref() {
91
+ if (ref_ >= kMaxRef-1) {
92
+ static std::once_flag ref_once;
93
+ std::call_once(ref_once, []() {
94
+ ref_mutex = new Mutex;
95
+ ref_map = new std::map<Regexp*, int>;
96
+ });
97
+
98
+ // Store ref count in overflow map.
99
+ MutexLock l(ref_mutex);
100
+ if (ref_ == kMaxRef) {
101
+ // already overflowed
102
+ (*ref_map)[this]++;
103
+ } else {
104
+ // overflowing now
105
+ (*ref_map)[this] = kMaxRef;
106
+ ref_ = kMaxRef;
107
+ }
108
+ return this;
109
+ }
110
+
111
+ ref_++;
112
+ return this;
113
+ }
114
+
115
+ // Decrements reference count and deletes this object if count reaches 0.
116
+ void Regexp::Decref() {
117
+ if (ref_ == kMaxRef) {
118
+ // Ref count is stored in overflow map.
119
+ MutexLock l(ref_mutex);
120
+ int r = (*ref_map)[this] - 1;
121
+ if (r < kMaxRef) {
122
+ ref_ = static_cast<uint16_t>(r);
123
+ ref_map->erase(this);
124
+ } else {
125
+ (*ref_map)[this] = r;
126
+ }
127
+ return;
128
+ }
129
+ ref_--;
130
+ if (ref_ == 0)
131
+ Destroy();
132
+ }
133
+
134
+ // Deletes this object; ref count has count reached 0.
135
+ void Regexp::Destroy() {
136
+ if (QuickDestroy())
137
+ return;
138
+
139
+ // Handle recursive Destroy with explicit stack
140
+ // to avoid arbitrarily deep recursion on process stack [sigh].
141
+ down_ = NULL;
142
+ Regexp* stack = this;
143
+ while (stack != NULL) {
144
+ Regexp* re = stack;
145
+ stack = re->down_;
146
+ if (re->ref_ != 0)
147
+ LOG(DFATAL) << "Bad reference count " << re->ref_;
148
+ if (re->nsub_ > 0) {
149
+ Regexp** subs = re->sub();
150
+ for (int i = 0; i < re->nsub_; i++) {
151
+ Regexp* sub = subs[i];
152
+ if (sub == NULL)
153
+ continue;
154
+ if (sub->ref_ == kMaxRef)
155
+ sub->Decref();
156
+ else
157
+ --sub->ref_;
158
+ if (sub->ref_ == 0 && !sub->QuickDestroy()) {
159
+ sub->down_ = stack;
160
+ stack = sub;
161
+ }
162
+ }
163
+ if (re->nsub_ > 1)
164
+ delete[] subs;
165
+ re->nsub_ = 0;
166
+ }
167
+ delete re;
168
+ }
169
+ }
170
+
171
+ void Regexp::AddRuneToString(Rune r) {
172
+ DCHECK(op_ == kRegexpLiteralString);
173
+ if (nrunes_ == 0) {
174
+ // start with 8
175
+ runes_ = new Rune[8];
176
+ } else if (nrunes_ >= 8 && (nrunes_ & (nrunes_ - 1)) == 0) {
177
+ // double on powers of two
178
+ Rune *old = runes_;
179
+ runes_ = new Rune[nrunes_ * 2];
180
+ for (int i = 0; i < nrunes_; i++)
181
+ runes_[i] = old[i];
182
+ delete[] old;
183
+ }
184
+
185
+ runes_[nrunes_++] = r;
186
+ }
187
+
188
+ Regexp* Regexp::HaveMatch(int match_id, ParseFlags flags) {
189
+ Regexp* re = new Regexp(kRegexpHaveMatch, flags);
190
+ re->match_id_ = match_id;
191
+ return re;
192
+ }
193
+
194
+ Regexp* Regexp::StarPlusOrQuest(RegexpOp op, Regexp* sub, ParseFlags flags) {
195
+ // Squash **, ++ and ??.
196
+ if (op == sub->op() && flags == sub->parse_flags())
197
+ return sub;
198
+
199
+ // Squash *+, *?, +*, +?, ?* and ?+. They all squash to *, so because
200
+ // op is Star/Plus/Quest, we just have to check that sub->op() is too.
201
+ if ((sub->op() == kRegexpStar ||
202
+ sub->op() == kRegexpPlus ||
203
+ sub->op() == kRegexpQuest) &&
204
+ flags == sub->parse_flags()) {
205
+ // If sub is Star, no need to rewrite it.
206
+ if (sub->op() == kRegexpStar)
207
+ return sub;
208
+
209
+ // Rewrite sub to Star.
210
+ Regexp* re = new Regexp(kRegexpStar, flags);
211
+ re->AllocSub(1);
212
+ re->sub()[0] = sub->sub()[0]->Incref();
213
+ sub->Decref(); // We didn't consume the reference after all.
214
+ return re;
215
+ }
216
+
217
+ Regexp* re = new Regexp(op, flags);
218
+ re->AllocSub(1);
219
+ re->sub()[0] = sub;
220
+ return re;
221
+ }
222
+
223
+ Regexp* Regexp::Plus(Regexp* sub, ParseFlags flags) {
224
+ return StarPlusOrQuest(kRegexpPlus, sub, flags);
225
+ }
226
+
227
+ Regexp* Regexp::Star(Regexp* sub, ParseFlags flags) {
228
+ return StarPlusOrQuest(kRegexpStar, sub, flags);
229
+ }
230
+
231
+ Regexp* Regexp::Quest(Regexp* sub, ParseFlags flags) {
232
+ return StarPlusOrQuest(kRegexpQuest, sub, flags);
233
+ }
234
+
235
+ Regexp* Regexp::ConcatOrAlternate(RegexpOp op, Regexp** sub, int nsub,
236
+ ParseFlags flags, bool can_factor) {
237
+ if (nsub == 1)
238
+ return sub[0];
239
+
240
+ if (nsub == 0) {
241
+ if (op == kRegexpAlternate)
242
+ return new Regexp(kRegexpNoMatch, flags);
243
+ else
244
+ return new Regexp(kRegexpEmptyMatch, flags);
245
+ }
246
+
247
+ PODArray<Regexp*> subcopy;
248
+ if (op == kRegexpAlternate && can_factor) {
249
+ // Going to edit sub; make a copy so we don't step on caller.
250
+ subcopy = PODArray<Regexp*>(nsub);
251
+ memmove(subcopy.data(), sub, nsub * sizeof sub[0]);
252
+ sub = subcopy.data();
253
+ nsub = FactorAlternation(sub, nsub, flags);
254
+ if (nsub == 1) {
255
+ Regexp* re = sub[0];
256
+ return re;
257
+ }
258
+ }
259
+
260
+ if (nsub > kMaxNsub) {
261
+ // Too many subexpressions to fit in a single Regexp.
262
+ // Make a two-level tree. Two levels gets us to 65535^2.
263
+ int nbigsub = (nsub+kMaxNsub-1)/kMaxNsub;
264
+ Regexp* re = new Regexp(op, flags);
265
+ re->AllocSub(nbigsub);
266
+ Regexp** subs = re->sub();
267
+ for (int i = 0; i < nbigsub - 1; i++)
268
+ subs[i] = ConcatOrAlternate(op, sub+i*kMaxNsub, kMaxNsub, flags, false);
269
+ subs[nbigsub - 1] = ConcatOrAlternate(op, sub+(nbigsub-1)*kMaxNsub,
270
+ nsub - (nbigsub-1)*kMaxNsub, flags,
271
+ false);
272
+ return re;
273
+ }
274
+
275
+ Regexp* re = new Regexp(op, flags);
276
+ re->AllocSub(nsub);
277
+ Regexp** subs = re->sub();
278
+ for (int i = 0; i < nsub; i++)
279
+ subs[i] = sub[i];
280
+ return re;
281
+ }
282
+
283
+ Regexp* Regexp::Concat(Regexp** sub, int nsub, ParseFlags flags) {
284
+ return ConcatOrAlternate(kRegexpConcat, sub, nsub, flags, false);
285
+ }
286
+
287
+ Regexp* Regexp::Alternate(Regexp** sub, int nsub, ParseFlags flags) {
288
+ return ConcatOrAlternate(kRegexpAlternate, sub, nsub, flags, true);
289
+ }
290
+
291
+ Regexp* Regexp::AlternateNoFactor(Regexp** sub, int nsub, ParseFlags flags) {
292
+ return ConcatOrAlternate(kRegexpAlternate, sub, nsub, flags, false);
293
+ }
294
+
295
+ Regexp* Regexp::Capture(Regexp* sub, ParseFlags flags, int cap) {
296
+ Regexp* re = new Regexp(kRegexpCapture, flags);
297
+ re->AllocSub(1);
298
+ re->sub()[0] = sub;
299
+ re->cap_ = cap;
300
+ return re;
301
+ }
302
+
303
+ Regexp* Regexp::Repeat(Regexp* sub, ParseFlags flags, int min, int max) {
304
+ Regexp* re = new Regexp(kRegexpRepeat, flags);
305
+ re->AllocSub(1);
306
+ re->sub()[0] = sub;
307
+ re->min_ = min;
308
+ re->max_ = max;
309
+ return re;
310
+ }
311
+
312
+ Regexp* Regexp::NewLiteral(Rune rune, ParseFlags flags) {
313
+ Regexp* re = new Regexp(kRegexpLiteral, flags);
314
+ re->rune_ = rune;
315
+ return re;
316
+ }
317
+
318
+ Regexp* Regexp::LiteralString(Rune* runes, int nrunes, ParseFlags flags) {
319
+ if (nrunes <= 0)
320
+ return new Regexp(kRegexpEmptyMatch, flags);
321
+ if (nrunes == 1)
322
+ return NewLiteral(runes[0], flags);
323
+ Regexp* re = new Regexp(kRegexpLiteralString, flags);
324
+ for (int i = 0; i < nrunes; i++)
325
+ re->AddRuneToString(runes[i]);
326
+ return re;
327
+ }
328
+
329
+ Regexp* Regexp::NewCharClass(CharClass* cc, ParseFlags flags) {
330
+ Regexp* re = new Regexp(kRegexpCharClass, flags);
331
+ re->cc_ = cc;
332
+ return re;
333
+ }
334
+
335
+ void Regexp::Swap(Regexp* that) {
336
+ // Regexp is not trivially copyable, so we cannot freely copy it with
337
+ // memmove(3), but swapping objects like so is safe for our purposes.
338
+ char tmp[sizeof *this];
339
+ void* vthis = reinterpret_cast<void*>(this);
340
+ void* vthat = reinterpret_cast<void*>(that);
341
+ memmove(tmp, vthis, sizeof *this);
342
+ memmove(vthis, vthat, sizeof *this);
343
+ memmove(vthat, tmp, sizeof *this);
344
+ }
345
+
346
+ // Tests equality of all top-level structure but not subregexps.
347
+ static bool TopEqual(Regexp* a, Regexp* b) {
348
+ if (a->op() != b->op())
349
+ return false;
350
+
351
+ switch (a->op()) {
352
+ case kRegexpNoMatch:
353
+ case kRegexpEmptyMatch:
354
+ case kRegexpAnyChar:
355
+ case kRegexpAnyByte:
356
+ case kRegexpBeginLine:
357
+ case kRegexpEndLine:
358
+ case kRegexpWordBoundary:
359
+ case kRegexpNoWordBoundary:
360
+ case kRegexpBeginText:
361
+ return true;
362
+
363
+ case kRegexpEndText:
364
+ // The parse flags remember whether it's \z or (?-m:$),
365
+ // which matters when testing against PCRE.
366
+ return ((a->parse_flags() ^ b->parse_flags()) & Regexp::WasDollar) == 0;
367
+
368
+ case kRegexpLiteral:
369
+ return a->rune() == b->rune() &&
370
+ ((a->parse_flags() ^ b->parse_flags()) & Regexp::FoldCase) == 0;
371
+
372
+ case kRegexpLiteralString:
373
+ return a->nrunes() == b->nrunes() &&
374
+ ((a->parse_flags() ^ b->parse_flags()) & Regexp::FoldCase) == 0 &&
375
+ memcmp(a->runes(), b->runes(),
376
+ a->nrunes() * sizeof a->runes()[0]) == 0;
377
+
378
+ case kRegexpAlternate:
379
+ case kRegexpConcat:
380
+ return a->nsub() == b->nsub();
381
+
382
+ case kRegexpStar:
383
+ case kRegexpPlus:
384
+ case kRegexpQuest:
385
+ return ((a->parse_flags() ^ b->parse_flags()) & Regexp::NonGreedy) == 0;
386
+
387
+ case kRegexpRepeat:
388
+ return ((a->parse_flags() ^ b->parse_flags()) & Regexp::NonGreedy) == 0 &&
389
+ a->min() == b->min() &&
390
+ a->max() == b->max();
391
+
392
+ case kRegexpCapture:
393
+ return a->cap() == b->cap() && a->name() == b->name();
394
+
395
+ case kRegexpHaveMatch:
396
+ return a->match_id() == b->match_id();
397
+
398
+ case kRegexpCharClass: {
399
+ CharClass* acc = a->cc();
400
+ CharClass* bcc = b->cc();
401
+ return acc->size() == bcc->size() &&
402
+ acc->end() - acc->begin() == bcc->end() - bcc->begin() &&
403
+ memcmp(acc->begin(), bcc->begin(),
404
+ (acc->end() - acc->begin()) * sizeof acc->begin()[0]) == 0;
405
+ }
406
+ }
407
+
408
+ LOG(DFATAL) << "Unexpected op in Regexp::Equal: " << a->op();
409
+ return 0;
410
+ }
411
+
412
+ bool Regexp::Equal(Regexp* a, Regexp* b) {
413
+ if (a == NULL || b == NULL)
414
+ return a == b;
415
+
416
+ if (!TopEqual(a, b))
417
+ return false;
418
+
419
+ // Fast path:
420
+ // return without allocating vector if there are no subregexps.
421
+ switch (a->op()) {
422
+ case kRegexpAlternate:
423
+ case kRegexpConcat:
424
+ case kRegexpStar:
425
+ case kRegexpPlus:
426
+ case kRegexpQuest:
427
+ case kRegexpRepeat:
428
+ case kRegexpCapture:
429
+ break;
430
+
431
+ default:
432
+ return true;
433
+ }
434
+
435
+ // Committed to doing real work.
436
+ // The stack (vector) has pairs of regexps waiting to
437
+ // be compared. The regexps are only equal if
438
+ // all the pairs end up being equal.
439
+ std::vector<Regexp*> stk;
440
+
441
+ for (;;) {
442
+ // Invariant: TopEqual(a, b) == true.
443
+ Regexp* a2;
444
+ Regexp* b2;
445
+ switch (a->op()) {
446
+ default:
447
+ break;
448
+ case kRegexpAlternate:
449
+ case kRegexpConcat:
450
+ for (int i = 0; i < a->nsub(); i++) {
451
+ a2 = a->sub()[i];
452
+ b2 = b->sub()[i];
453
+ if (!TopEqual(a2, b2))
454
+ return false;
455
+ stk.push_back(a2);
456
+ stk.push_back(b2);
457
+ }
458
+ break;
459
+
460
+ case kRegexpStar:
461
+ case kRegexpPlus:
462
+ case kRegexpQuest:
463
+ case kRegexpRepeat:
464
+ case kRegexpCapture:
465
+ a2 = a->sub()[0];
466
+ b2 = b->sub()[0];
467
+ if (!TopEqual(a2, b2))
468
+ return false;
469
+ // Really:
470
+ // stk.push_back(a2);
471
+ // stk.push_back(b2);
472
+ // break;
473
+ // but faster to assign directly and loop.
474
+ a = a2;
475
+ b = b2;
476
+ continue;
477
+ }
478
+
479
+ size_t n = stk.size();
480
+ if (n == 0)
481
+ break;
482
+
483
+ DCHECK_GE(n, 2);
484
+ a = stk[n-2];
485
+ b = stk[n-1];
486
+ stk.resize(n-2);
487
+ }
488
+
489
+ return true;
490
+ }
491
+
492
+ // Keep in sync with enum RegexpStatusCode in regexp.h
493
+ static const char *kErrorStrings[] = {
494
+ "no error",
495
+ "unexpected error",
496
+ "invalid escape sequence",
497
+ "invalid character class",
498
+ "invalid character class range",
499
+ "missing ]",
500
+ "missing )",
501
+ "trailing \\",
502
+ "no argument for repetition operator",
503
+ "invalid repetition size",
504
+ "bad repetition operator",
505
+ "invalid perl operator",
506
+ "invalid UTF-8",
507
+ "invalid named capture group",
508
+ };
509
+
510
+ std::string RegexpStatus::CodeText(enum RegexpStatusCode code) {
511
+ if (code < 0 || code >= arraysize(kErrorStrings))
512
+ code = kRegexpInternalError;
513
+ return kErrorStrings[code];
514
+ }
515
+
516
+ std::string RegexpStatus::Text() const {
517
+ if (error_arg_.empty())
518
+ return CodeText(code_);
519
+ std::string s;
520
+ s.append(CodeText(code_));
521
+ s.append(": ");
522
+ s.append(error_arg_.data(), error_arg_.size());
523
+ return s;
524
+ }
525
+
526
+ void RegexpStatus::Copy(const RegexpStatus& status) {
527
+ code_ = status.code_;
528
+ error_arg_ = status.error_arg_;
529
+ }
530
+
531
+ typedef int Ignored; // Walker<void> doesn't exist
532
+
533
+ // Walker subclass to count capturing parens in regexp.
534
+ class NumCapturesWalker : public Regexp::Walker<Ignored> {
535
+ public:
536
+ NumCapturesWalker() : ncapture_(0) {}
537
+ int ncapture() { return ncapture_; }
538
+
539
+ virtual Ignored PreVisit(Regexp* re, Ignored ignored, bool* stop) {
540
+ if (re->op() == kRegexpCapture)
541
+ ncapture_++;
542
+ return ignored;
543
+ }
544
+
545
+ virtual Ignored ShortVisit(Regexp* re, Ignored ignored) {
546
+ // Should never be called: we use Walk(), not WalkExponential().
547
+ #ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
548
+ LOG(DFATAL) << "NumCapturesWalker::ShortVisit called";
549
+ #endif
550
+ return ignored;
551
+ }
552
+
553
+ private:
554
+ int ncapture_;
555
+
556
+ NumCapturesWalker(const NumCapturesWalker&) = delete;
557
+ NumCapturesWalker& operator=(const NumCapturesWalker&) = delete;
558
+ };
559
+
560
+ int Regexp::NumCaptures() {
561
+ NumCapturesWalker w;
562
+ w.Walk(this, 0);
563
+ return w.ncapture();
564
+ }
565
+
566
+ // Walker class to build map of named capture groups and their indices.
567
+ class NamedCapturesWalker : public Regexp::Walker<Ignored> {
568
+ public:
569
+ NamedCapturesWalker() : map_(NULL) {}
570
+ ~NamedCapturesWalker() { delete map_; }
571
+
572
+ std::map<std::string, int>* TakeMap() {
573
+ std::map<std::string, int>* m = map_;
574
+ map_ = NULL;
575
+ return m;
576
+ }
577
+
578
+ virtual Ignored PreVisit(Regexp* re, Ignored ignored, bool* stop) {
579
+ if (re->op() == kRegexpCapture && re->name() != NULL) {
580
+ // Allocate map once we find a name.
581
+ if (map_ == NULL)
582
+ map_ = new std::map<std::string, int>;
583
+
584
+ // Record first occurrence of each name.
585
+ // (The rule is that if you have the same name
586
+ // multiple times, only the leftmost one counts.)
587
+ if (map_->find(*re->name()) == map_->end())
588
+ (*map_)[*re->name()] = re->cap();
589
+ }
590
+ return ignored;
591
+ }
592
+
593
+ virtual Ignored ShortVisit(Regexp* re, Ignored ignored) {
594
+ // Should never be called: we use Walk(), not WalkExponential().
595
+ #ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
596
+ LOG(DFATAL) << "NamedCapturesWalker::ShortVisit called";
597
+ #endif
598
+ return ignored;
599
+ }
600
+
601
+ private:
602
+ std::map<std::string, int>* map_;
603
+
604
+ NamedCapturesWalker(const NamedCapturesWalker&) = delete;
605
+ NamedCapturesWalker& operator=(const NamedCapturesWalker&) = delete;
606
+ };
607
+
608
+ std::map<std::string, int>* Regexp::NamedCaptures() {
609
+ NamedCapturesWalker w;
610
+ w.Walk(this, 0);
611
+ return w.TakeMap();
612
+ }
613
+
614
+ // Walker class to build map from capture group indices to their names.
615
+ class CaptureNamesWalker : public Regexp::Walker<Ignored> {
616
+ public:
617
+ CaptureNamesWalker() : map_(NULL) {}
618
+ ~CaptureNamesWalker() { delete map_; }
619
+
620
+ std::map<int, std::string>* TakeMap() {
621
+ std::map<int, std::string>* m = map_;
622
+ map_ = NULL;
623
+ return m;
624
+ }
625
+
626
+ virtual Ignored PreVisit(Regexp* re, Ignored ignored, bool* stop) {
627
+ if (re->op() == kRegexpCapture && re->name() != NULL) {
628
+ // Allocate map once we find a name.
629
+ if (map_ == NULL)
630
+ map_ = new std::map<int, std::string>;
631
+
632
+ (*map_)[re->cap()] = *re->name();
633
+ }
634
+ return ignored;
635
+ }
636
+
637
+ virtual Ignored ShortVisit(Regexp* re, Ignored ignored) {
638
+ // Should never be called: we use Walk(), not WalkExponential().
639
+ #ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
640
+ LOG(DFATAL) << "CaptureNamesWalker::ShortVisit called";
641
+ #endif
642
+ return ignored;
643
+ }
644
+
645
+ private:
646
+ std::map<int, std::string>* map_;
647
+
648
+ CaptureNamesWalker(const CaptureNamesWalker&) = delete;
649
+ CaptureNamesWalker& operator=(const CaptureNamesWalker&) = delete;
650
+ };
651
+
652
+ std::map<int, std::string>* Regexp::CaptureNames() {
653
+ CaptureNamesWalker w;
654
+ w.Walk(this, 0);
655
+ return w.TakeMap();
656
+ }
657
+
658
+ void ConvertRunesToBytes(bool latin1, Rune* runes, int nrunes,
659
+ std::string* bytes) {
660
+ if (latin1) {
661
+ bytes->resize(nrunes);
662
+ for (int i = 0; i < nrunes; i++)
663
+ (*bytes)[i] = static_cast<char>(runes[i]);
664
+ } else {
665
+ bytes->resize(nrunes * UTFmax); // worst case
666
+ char* p = &(*bytes)[0];
667
+ for (int i = 0; i < nrunes; i++)
668
+ p += runetochar(p, &runes[i]);
669
+ bytes->resize(p - &(*bytes)[0]);
670
+ bytes->shrink_to_fit();
671
+ }
672
+ }
673
+
674
+ // Determines whether regexp matches must be anchored
675
+ // with a fixed string prefix. If so, returns the prefix and
676
+ // the regexp that remains after the prefix. The prefix might
677
+ // be ASCII case-insensitive.
678
+ bool Regexp::RequiredPrefix(std::string* prefix, bool* foldcase,
679
+ Regexp** suffix) {
680
+ prefix->clear();
681
+ *foldcase = false;
682
+ *suffix = NULL;
683
+
684
+ // No need for a walker: the regexp must be of the form
685
+ // 1. some number of ^ anchors
686
+ // 2. a literal char or string
687
+ // 3. the rest
688
+ if (op_ != kRegexpConcat)
689
+ return false;
690
+ int i = 0;
691
+ while (i < nsub_ && sub()[i]->op_ == kRegexpBeginText)
692
+ i++;
693
+ if (i == 0 || i >= nsub_)
694
+ return false;
695
+ Regexp* re = sub()[i];
696
+ if (re->op_ != kRegexpLiteral &&
697
+ re->op_ != kRegexpLiteralString)
698
+ return false;
699
+ i++;
700
+ if (i < nsub_) {
701
+ for (int j = i; j < nsub_; j++)
702
+ sub()[j]->Incref();
703
+ *suffix = Concat(sub() + i, nsub_ - i, parse_flags());
704
+ } else {
705
+ *suffix = new Regexp(kRegexpEmptyMatch, parse_flags());
706
+ }
707
+
708
+ bool latin1 = (re->parse_flags() & Latin1) != 0;
709
+ Rune* runes = re->op_ == kRegexpLiteral ? &re->rune_ : re->runes_;
710
+ int nrunes = re->op_ == kRegexpLiteral ? 1 : re->nrunes_;
711
+ ConvertRunesToBytes(latin1, runes, nrunes, prefix);
712
+ *foldcase = (re->parse_flags() & FoldCase) != 0;
713
+ return true;
714
+ }
715
+
716
+ // Determines whether regexp matches must be unanchored
717
+ // with a fixed string prefix. If so, returns the prefix.
718
+ // The prefix might be ASCII case-insensitive.
719
+ bool Regexp::RequiredPrefixForAccel(std::string* prefix, bool* foldcase) {
720
+ prefix->clear();
721
+ *foldcase = false;
722
+
723
+ // No need for a walker: the regexp must either begin with or be
724
+ // a literal char or string.
725
+ Regexp* re = op_ == kRegexpConcat && nsub_ > 0 ? sub()[0] : this;
726
+ if (re->op_ != kRegexpLiteral &&
727
+ re->op_ != kRegexpLiteralString)
728
+ return false;
729
+
730
+ bool latin1 = (re->parse_flags() & Latin1) != 0;
731
+ Rune* runes = re->op_ == kRegexpLiteral ? &re->rune_ : re->runes_;
732
+ int nrunes = re->op_ == kRegexpLiteral ? 1 : re->nrunes_;
733
+ ConvertRunesToBytes(latin1, runes, nrunes, prefix);
734
+ *foldcase = (re->parse_flags() & FoldCase) != 0;
735
+ return true;
736
+ }
737
+
738
+ // Character class builder is a balanced binary tree (STL set)
739
+ // containing non-overlapping, non-abutting RuneRanges.
740
+ // The less-than operator used in the tree treats two
741
+ // ranges as equal if they overlap at all, so that
742
+ // lookups for a particular Rune are possible.
743
+
744
+ CharClassBuilder::CharClassBuilder() {
745
+ nrunes_ = 0;
746
+ upper_ = 0;
747
+ lower_ = 0;
748
+ }
749
+
750
+ // Add lo-hi to the class; return whether class got bigger.
751
+ bool CharClassBuilder::AddRange(Rune lo, Rune hi) {
752
+ if (hi < lo)
753
+ return false;
754
+
755
+ if (lo <= 'z' && hi >= 'A') {
756
+ // Overlaps some alpha, maybe not all.
757
+ // Update bitmaps telling which ASCII letters are in the set.
758
+ Rune lo1 = std::max<Rune>(lo, 'A');
759
+ Rune hi1 = std::min<Rune>(hi, 'Z');
760
+ if (lo1 <= hi1)
761
+ upper_ |= ((1 << (hi1 - lo1 + 1)) - 1) << (lo1 - 'A');
762
+
763
+ lo1 = std::max<Rune>(lo, 'a');
764
+ hi1 = std::min<Rune>(hi, 'z');
765
+ if (lo1 <= hi1)
766
+ lower_ |= ((1 << (hi1 - lo1 + 1)) - 1) << (lo1 - 'a');
767
+ }
768
+
769
+ { // Check whether lo, hi is already in the class.
770
+ iterator it = ranges_.find(RuneRange(lo, lo));
771
+ if (it != end() && it->lo <= lo && hi <= it->hi)
772
+ return false;
773
+ }
774
+
775
+ // Look for a range abutting lo on the left.
776
+ // If it exists, take it out and increase our range.
777
+ if (lo > 0) {
778
+ iterator it = ranges_.find(RuneRange(lo-1, lo-1));
779
+ if (it != end()) {
780
+ lo = it->lo;
781
+ if (it->hi > hi)
782
+ hi = it->hi;
783
+ nrunes_ -= it->hi - it->lo + 1;
784
+ ranges_.erase(it);
785
+ }
786
+ }
787
+
788
+ // Look for a range abutting hi on the right.
789
+ // If it exists, take it out and increase our range.
790
+ if (hi < Runemax) {
791
+ iterator it = ranges_.find(RuneRange(hi+1, hi+1));
792
+ if (it != end()) {
793
+ hi = it->hi;
794
+ nrunes_ -= it->hi - it->lo + 1;
795
+ ranges_.erase(it);
796
+ }
797
+ }
798
+
799
+ // Look for ranges between lo and hi. Take them out.
800
+ // This is only safe because the set has no overlapping ranges.
801
+ // We've already removed any ranges abutting lo and hi, so
802
+ // any that overlap [lo, hi] must be contained within it.
803
+ for (;;) {
804
+ iterator it = ranges_.find(RuneRange(lo, hi));
805
+ if (it == end())
806
+ break;
807
+ nrunes_ -= it->hi - it->lo + 1;
808
+ ranges_.erase(it);
809
+ }
810
+
811
+ // Finally, add [lo, hi].
812
+ nrunes_ += hi - lo + 1;
813
+ ranges_.insert(RuneRange(lo, hi));
814
+ return true;
815
+ }
816
+
817
+ void CharClassBuilder::AddCharClass(CharClassBuilder *cc) {
818
+ for (iterator it = cc->begin(); it != cc->end(); ++it)
819
+ AddRange(it->lo, it->hi);
820
+ }
821
+
822
+ bool CharClassBuilder::Contains(Rune r) {
823
+ return ranges_.find(RuneRange(r, r)) != end();
824
+ }
825
+
826
+ // Does the character class behave the same on A-Z as on a-z?
827
+ bool CharClassBuilder::FoldsASCII() {
828
+ return ((upper_ ^ lower_) & AlphaMask) == 0;
829
+ }
830
+
831
+ CharClassBuilder* CharClassBuilder::Copy() {
832
+ CharClassBuilder* cc = new CharClassBuilder;
833
+ for (iterator it = begin(); it != end(); ++it)
834
+ cc->ranges_.insert(RuneRange(it->lo, it->hi));
835
+ cc->upper_ = upper_;
836
+ cc->lower_ = lower_;
837
+ cc->nrunes_ = nrunes_;
838
+ return cc;
839
+ }
840
+
841
+
842
+
843
+ void CharClassBuilder::RemoveAbove(Rune r) {
844
+ if (r >= Runemax)
845
+ return;
846
+
847
+ if (r < 'z') {
848
+ if (r < 'a')
849
+ lower_ = 0;
850
+ else
851
+ lower_ &= AlphaMask >> ('z' - r);
852
+ }
853
+
854
+ if (r < 'Z') {
855
+ if (r < 'A')
856
+ upper_ = 0;
857
+ else
858
+ upper_ &= AlphaMask >> ('Z' - r);
859
+ }
860
+
861
+ for (;;) {
862
+
863
+ iterator it = ranges_.find(RuneRange(r + 1, Runemax));
864
+ if (it == end())
865
+ break;
866
+ RuneRange rr = *it;
867
+ ranges_.erase(it);
868
+ nrunes_ -= rr.hi - rr.lo + 1;
869
+ if (rr.lo <= r) {
870
+ rr.hi = r;
871
+ ranges_.insert(rr);
872
+ nrunes_ += rr.hi - rr.lo + 1;
873
+ }
874
+ }
875
+ }
876
+
877
+ void CharClassBuilder::Negate() {
878
+ // Build up negation and then copy in.
879
+ // Could edit ranges in place, but C++ won't let me.
880
+ std::vector<RuneRange> v;
881
+ v.reserve(ranges_.size() + 1);
882
+
883
+ // In negation, first range begins at 0, unless
884
+ // the current class begins at 0.
885
+ iterator it = begin();
886
+ if (it == end()) {
887
+ v.push_back(RuneRange(0, Runemax));
888
+ } else {
889
+ int nextlo = 0;
890
+ if (it->lo == 0) {
891
+ nextlo = it->hi + 1;
892
+ ++it;
893
+ }
894
+ for (; it != end(); ++it) {
895
+ v.push_back(RuneRange(nextlo, it->lo - 1));
896
+ nextlo = it->hi + 1;
897
+ }
898
+ if (nextlo <= Runemax)
899
+ v.push_back(RuneRange(nextlo, Runemax));
900
+ }
901
+
902
+ ranges_.clear();
903
+ for (size_t i = 0; i < v.size(); i++)
904
+ ranges_.insert(v[i]);
905
+
906
+ upper_ = AlphaMask & ~upper_;
907
+ lower_ = AlphaMask & ~lower_;
908
+ nrunes_ = Runemax+1 - nrunes_;
909
+ }
910
+
911
+ // Character class is a sorted list of ranges.
912
+ // The ranges are allocated in the same block as the header,
913
+ // necessitating a special allocator and Delete method.
914
+
915
+ CharClass* CharClass::New(int maxranges) {
916
+ CharClass* cc;
917
+ uint8_t* data = new uint8_t[sizeof *cc + maxranges*sizeof cc->ranges_[0]];
918
+ cc = reinterpret_cast<CharClass*>(data);
919
+ cc->ranges_ = reinterpret_cast<RuneRange*>(data + sizeof *cc);
920
+ cc->nranges_ = 0;
921
+ cc->folds_ascii_ = false;
922
+ cc->nrunes_ = 0;
923
+ return cc;
924
+ }
925
+
926
+ void CharClass::Delete() {
927
+ uint8_t* data = reinterpret_cast<uint8_t*>(this);
928
+ delete[] data;
929
+ }
930
+
931
+ CharClass* CharClass::Negate() {
932
+ CharClass* cc = CharClass::New(nranges_+1);
933
+ cc->folds_ascii_ = folds_ascii_;
934
+ cc->nrunes_ = Runemax + 1 - nrunes_;
935
+ int n = 0;
936
+ int nextlo = 0;
937
+ for (CharClass::iterator it = begin(); it != end(); ++it) {
938
+ if (it->lo == nextlo) {
939
+ nextlo = it->hi + 1;
940
+ } else {
941
+ cc->ranges_[n++] = RuneRange(nextlo, it->lo - 1);
942
+ nextlo = it->hi + 1;
943
+ }
944
+ }
945
+ if (nextlo <= Runemax)
946
+ cc->ranges_[n++] = RuneRange(nextlo, Runemax);
947
+ cc->nranges_ = n;
948
+ return cc;
949
+ }
950
+
951
+ bool CharClass::Contains(Rune r) {
952
+ RuneRange* rr = ranges_;
953
+ int n = nranges_;
954
+ while (n > 0) {
955
+ int m = n/2;
956
+ if (rr[m].hi < r) {
957
+ rr += m+1;
958
+ n -= m+1;
959
+ } else if (r < rr[m].lo) {
960
+ n = m;
961
+ } else { // rr[m].lo <= r && r <= rr[m].hi
962
+ return true;
963
+ }
964
+ }
965
+ return false;
966
+ }
967
+
968
+ CharClass* CharClassBuilder::GetCharClass() {
969
+ CharClass* cc = CharClass::New(static_cast<int>(ranges_.size()));
970
+ int n = 0;
971
+ for (iterator it = begin(); it != end(); ++it)
972
+ cc->ranges_[n++] = *it;
973
+ cc->nranges_ = n;
974
+ DCHECK_LE(n, static_cast<int>(ranges_.size()));
975
+ cc->nrunes_ = nrunes_;
976
+ cc->folds_ascii_ = FoldsASCII();
977
+ return cc;
978
+ }
979
+
980
+ } // namespace re2