grpc 1.31.0.pre1 → 1.31.0.pre2

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of grpc might be problematic. Click here for more details.

Files changed (55) hide show
  1. checksums.yaml +4 -4
  2. data/Makefile +2 -2
  3. data/src/core/ext/filters/client_channel/lb_policy/weighted_target/weighted_target.cc +3 -4
  4. data/src/core/ext/filters/client_channel/lb_policy/xds/xds_routing.cc +5 -4
  5. data/src/ruby/lib/grpc/version.rb +1 -1
  6. data/third_party/re2/re2/bitmap256.h +117 -0
  7. data/third_party/re2/re2/bitstate.cc +385 -0
  8. data/third_party/re2/re2/compile.cc +1279 -0
  9. data/third_party/re2/re2/dfa.cc +2130 -0
  10. data/third_party/re2/re2/filtered_re2.cc +121 -0
  11. data/third_party/re2/re2/filtered_re2.h +109 -0
  12. data/third_party/re2/re2/mimics_pcre.cc +197 -0
  13. data/third_party/re2/re2/nfa.cc +713 -0
  14. data/third_party/re2/re2/onepass.cc +623 -0
  15. data/third_party/re2/re2/parse.cc +2464 -0
  16. data/third_party/re2/re2/perl_groups.cc +119 -0
  17. data/third_party/re2/re2/pod_array.h +55 -0
  18. data/third_party/re2/re2/prefilter.cc +710 -0
  19. data/third_party/re2/re2/prefilter.h +108 -0
  20. data/third_party/re2/re2/prefilter_tree.cc +407 -0
  21. data/third_party/re2/re2/prefilter_tree.h +139 -0
  22. data/third_party/re2/re2/prog.cc +988 -0
  23. data/third_party/re2/re2/prog.h +436 -0
  24. data/third_party/re2/re2/re2.cc +1362 -0
  25. data/third_party/re2/re2/re2.h +1002 -0
  26. data/third_party/re2/re2/regexp.cc +980 -0
  27. data/third_party/re2/re2/regexp.h +659 -0
  28. data/third_party/re2/re2/set.cc +154 -0
  29. data/third_party/re2/re2/set.h +80 -0
  30. data/third_party/re2/re2/simplify.cc +657 -0
  31. data/third_party/re2/re2/sparse_array.h +392 -0
  32. data/third_party/re2/re2/sparse_set.h +264 -0
  33. data/third_party/re2/re2/stringpiece.cc +65 -0
  34. data/third_party/re2/re2/stringpiece.h +210 -0
  35. data/third_party/re2/re2/tostring.cc +351 -0
  36. data/third_party/re2/re2/unicode_casefold.cc +582 -0
  37. data/third_party/re2/re2/unicode_casefold.h +78 -0
  38. data/third_party/re2/re2/unicode_groups.cc +6269 -0
  39. data/third_party/re2/re2/unicode_groups.h +67 -0
  40. data/third_party/re2/re2/walker-inl.h +246 -0
  41. data/third_party/re2/util/benchmark.h +156 -0
  42. data/third_party/re2/util/flags.h +26 -0
  43. data/third_party/re2/util/logging.h +109 -0
  44. data/third_party/re2/util/malloc_counter.h +19 -0
  45. data/third_party/re2/util/mix.h +41 -0
  46. data/third_party/re2/util/mutex.h +148 -0
  47. data/third_party/re2/util/pcre.cc +1025 -0
  48. data/third_party/re2/util/pcre.h +681 -0
  49. data/third_party/re2/util/rune.cc +260 -0
  50. data/third_party/re2/util/strutil.cc +149 -0
  51. data/third_party/re2/util/strutil.h +21 -0
  52. data/third_party/re2/util/test.h +50 -0
  53. data/third_party/re2/util/utf.h +44 -0
  54. data/third_party/re2/util/util.h +42 -0
  55. metadata +78 -29
@@ -0,0 +1,26 @@
1
+ // Copyright 2009 The RE2 Authors. All Rights Reserved.
2
+ // Use of this source code is governed by a BSD-style
3
+ // license that can be found in the LICENSE file.
4
+
5
+ #ifndef UTIL_FLAGS_H_
6
+ #define UTIL_FLAGS_H_
7
+
8
+ // Simplified version of Google's command line flags.
9
+ // Does not support parsing the command line.
10
+ // If you want to do that, see
11
+ // https://gflags.github.io/gflags/
12
+
13
+ #define DEFINE_FLAG(type, name, deflt, desc) \
14
+ namespace re2 { type FLAGS_##name = deflt; }
15
+
16
+ #define DECLARE_FLAG(type, name) \
17
+ namespace re2 { extern type FLAGS_##name; }
18
+
19
+ namespace re2 {
20
+ template <typename T>
21
+ T GetFlag(const T& flag) {
22
+ return flag;
23
+ }
24
+ } // namespace re2
25
+
26
+ #endif // UTIL_FLAGS_H_
@@ -0,0 +1,109 @@
1
+ // Copyright 2009 The RE2 Authors. All Rights Reserved.
2
+ // Use of this source code is governed by a BSD-style
3
+ // license that can be found in the LICENSE file.
4
+
5
+ #ifndef UTIL_LOGGING_H_
6
+ #define UTIL_LOGGING_H_
7
+
8
+ // Simplified version of Google's logging.
9
+
10
+ #include <assert.h>
11
+ #include <stdio.h>
12
+ #include <stdlib.h>
13
+ #include <ostream>
14
+ #include <sstream>
15
+
16
+ #include "util/util.h"
17
+
18
+ // Debug-only checking.
19
+ #define DCHECK(condition) assert(condition)
20
+ #define DCHECK_EQ(val1, val2) assert((val1) == (val2))
21
+ #define DCHECK_NE(val1, val2) assert((val1) != (val2))
22
+ #define DCHECK_LE(val1, val2) assert((val1) <= (val2))
23
+ #define DCHECK_LT(val1, val2) assert((val1) < (val2))
24
+ #define DCHECK_GE(val1, val2) assert((val1) >= (val2))
25
+ #define DCHECK_GT(val1, val2) assert((val1) > (val2))
26
+
27
+ // Always-on checking
28
+ #define CHECK(x) if(x){}else LogMessageFatal(__FILE__, __LINE__).stream() << "Check failed: " #x
29
+ #define CHECK_LT(x, y) CHECK((x) < (y))
30
+ #define CHECK_GT(x, y) CHECK((x) > (y))
31
+ #define CHECK_LE(x, y) CHECK((x) <= (y))
32
+ #define CHECK_GE(x, y) CHECK((x) >= (y))
33
+ #define CHECK_EQ(x, y) CHECK((x) == (y))
34
+ #define CHECK_NE(x, y) CHECK((x) != (y))
35
+
36
+ #define LOG_INFO LogMessage(__FILE__, __LINE__)
37
+ #define LOG_WARNING LogMessage(__FILE__, __LINE__)
38
+ #define LOG_ERROR LogMessage(__FILE__, __LINE__)
39
+ #define LOG_FATAL LogMessageFatal(__FILE__, __LINE__)
40
+ #define LOG_QFATAL LOG_FATAL
41
+
42
+ // It seems that one of the Windows header files defines ERROR as 0.
43
+ #ifdef _WIN32
44
+ #define LOG_0 LOG_INFO
45
+ #endif
46
+
47
+ #ifdef NDEBUG
48
+ #define LOG_DFATAL LOG_ERROR
49
+ #else
50
+ #define LOG_DFATAL LOG_FATAL
51
+ #endif
52
+
53
+ #define LOG(severity) LOG_ ## severity.stream()
54
+
55
+ #define VLOG(x) if((x)>0){}else LOG_INFO.stream()
56
+
57
+ class LogMessage {
58
+ public:
59
+ LogMessage(const char* file, int line)
60
+ : flushed_(false) {
61
+ stream() << file << ":" << line << ": ";
62
+ }
63
+ void Flush() {
64
+ stream() << "\n";
65
+ std::string s = str_.str();
66
+ size_t n = s.size();
67
+ if (fwrite(s.data(), 1, n, stderr) < n) {} // shut up gcc
68
+ flushed_ = true;
69
+ }
70
+ ~LogMessage() {
71
+ if (!flushed_) {
72
+ Flush();
73
+ }
74
+ }
75
+ std::ostream& stream() { return str_; }
76
+
77
+ private:
78
+ bool flushed_;
79
+ std::ostringstream str_;
80
+
81
+ LogMessage(const LogMessage&) = delete;
82
+ LogMessage& operator=(const LogMessage&) = delete;
83
+ };
84
+
85
+ // Silence "destructor never returns" warning for ~LogMessageFatal().
86
+ // Since this is a header file, push and then pop to limit the scope.
87
+ #ifdef _MSC_VER
88
+ #pragma warning(push)
89
+ #pragma warning(disable: 4722)
90
+ #endif
91
+
92
+ class LogMessageFatal : public LogMessage {
93
+ public:
94
+ LogMessageFatal(const char* file, int line)
95
+ : LogMessage(file, line) {}
96
+ ATTRIBUTE_NORETURN ~LogMessageFatal() {
97
+ Flush();
98
+ abort();
99
+ }
100
+ private:
101
+ LogMessageFatal(const LogMessageFatal&) = delete;
102
+ LogMessageFatal& operator=(const LogMessageFatal&) = delete;
103
+ };
104
+
105
+ #ifdef _MSC_VER
106
+ #pragma warning(pop)
107
+ #endif
108
+
109
+ #endif // UTIL_LOGGING_H_
@@ -0,0 +1,19 @@
1
+ // Copyright 2009 The RE2 Authors. All Rights Reserved.
2
+ // Use of this source code is governed by a BSD-style
3
+ // license that can be found in the LICENSE file.
4
+
5
+ #ifndef UTIL_MALLOC_COUNTER_H_
6
+ #define UTIL_MALLOC_COUNTER_H_
7
+
8
+ namespace testing {
9
+ class MallocCounter {
10
+ public:
11
+ MallocCounter(int x) {}
12
+ static const int THIS_THREAD_ONLY = 0;
13
+ long long HeapGrowth() { return 0; }
14
+ long long PeakHeapGrowth() { return 0; }
15
+ void Reset() {}
16
+ };
17
+ } // namespace testing
18
+
19
+ #endif // UTIL_MALLOC_COUNTER_H_
@@ -0,0 +1,41 @@
1
+ // Copyright 2016 The RE2 Authors. All Rights Reserved.
2
+ // Use of this source code is governed by a BSD-style
3
+ // license that can be found in the LICENSE file.
4
+
5
+ #ifndef UTIL_MIX_H_
6
+ #define UTIL_MIX_H_
7
+
8
+ #include <stddef.h>
9
+ #include <limits>
10
+
11
+ namespace re2 {
12
+
13
+ // Silence "truncation of constant value" warning for kMul in 32-bit mode.
14
+ // Since this is a header file, push and then pop to limit the scope.
15
+ #ifdef _MSC_VER
16
+ #pragma warning(push)
17
+ #pragma warning(disable: 4309)
18
+ #endif
19
+
20
+ class HashMix {
21
+ public:
22
+ HashMix() : hash_(1) {}
23
+ explicit HashMix(size_t val) : hash_(val + 83) {}
24
+ void Mix(size_t val) {
25
+ static const size_t kMul = static_cast<size_t>(0xdc3eb94af8ab4c93ULL);
26
+ hash_ *= kMul;
27
+ hash_ = ((hash_ << 19) |
28
+ (hash_ >> (std::numeric_limits<size_t>::digits - 19))) + val;
29
+ }
30
+ size_t get() const { return hash_; }
31
+ private:
32
+ size_t hash_;
33
+ };
34
+
35
+ #ifdef _MSC_VER
36
+ #pragma warning(pop)
37
+ #endif
38
+
39
+ } // namespace re2
40
+
41
+ #endif // UTIL_MIX_H_
@@ -0,0 +1,148 @@
1
+ // Copyright 2007 The RE2 Authors. All Rights Reserved.
2
+ // Use of this source code is governed by a BSD-style
3
+ // license that can be found in the LICENSE file.
4
+
5
+ #ifndef UTIL_MUTEX_H_
6
+ #define UTIL_MUTEX_H_
7
+
8
+ /*
9
+ * A simple mutex wrapper, supporting locks and read-write locks.
10
+ * You should assume the locks are *not* re-entrant.
11
+ */
12
+
13
+ #ifdef _WIN32
14
+ // Requires Windows Vista or Windows Server 2008 at minimum.
15
+ #if defined(WINVER) && WINVER >= 0x0600
16
+ #define MUTEX_IS_WIN32_SRWLOCK
17
+ #endif
18
+ #else
19
+ #ifndef _POSIX_C_SOURCE
20
+ #define _POSIX_C_SOURCE 200809L
21
+ #endif
22
+ #include <unistd.h>
23
+ #if defined(_POSIX_READER_WRITER_LOCKS) && _POSIX_READER_WRITER_LOCKS > 0
24
+ #define MUTEX_IS_PTHREAD_RWLOCK
25
+ #endif
26
+ #endif
27
+
28
+ #if defined(MUTEX_IS_WIN32_SRWLOCK)
29
+ #include <windows.h>
30
+ typedef SRWLOCK MutexType;
31
+ #elif defined(MUTEX_IS_PTHREAD_RWLOCK)
32
+ #include <pthread.h>
33
+ #include <stdlib.h>
34
+ typedef pthread_rwlock_t MutexType;
35
+ #else
36
+ #include <mutex>
37
+ typedef std::mutex MutexType;
38
+ #endif
39
+
40
+ namespace re2 {
41
+
42
+ class Mutex {
43
+ public:
44
+ inline Mutex();
45
+ inline ~Mutex();
46
+ inline void Lock(); // Block if needed until free then acquire exclusively
47
+ inline void Unlock(); // Release a lock acquired via Lock()
48
+ // Note that on systems that don't support read-write locks, these may
49
+ // be implemented as synonyms to Lock() and Unlock(). So you can use
50
+ // these for efficiency, but don't use them anyplace where being able
51
+ // to do shared reads is necessary to avoid deadlock.
52
+ inline void ReaderLock(); // Block until free or shared then acquire a share
53
+ inline void ReaderUnlock(); // Release a read share of this Mutex
54
+ inline void WriterLock() { Lock(); } // Acquire an exclusive lock
55
+ inline void WriterUnlock() { Unlock(); } // Release a lock from WriterLock()
56
+
57
+ private:
58
+ MutexType mutex_;
59
+
60
+ // Catch the error of writing Mutex when intending MutexLock.
61
+ Mutex(Mutex *ignored);
62
+
63
+ Mutex(const Mutex&) = delete;
64
+ Mutex& operator=(const Mutex&) = delete;
65
+ };
66
+
67
+ #if defined(MUTEX_IS_WIN32_SRWLOCK)
68
+
69
+ Mutex::Mutex() { InitializeSRWLock(&mutex_); }
70
+ Mutex::~Mutex() { }
71
+ void Mutex::Lock() { AcquireSRWLockExclusive(&mutex_); }
72
+ void Mutex::Unlock() { ReleaseSRWLockExclusive(&mutex_); }
73
+ void Mutex::ReaderLock() { AcquireSRWLockShared(&mutex_); }
74
+ void Mutex::ReaderUnlock() { ReleaseSRWLockShared(&mutex_); }
75
+
76
+ #elif defined(MUTEX_IS_PTHREAD_RWLOCK)
77
+
78
+ #define SAFE_PTHREAD(fncall) \
79
+ do { \
80
+ if ((fncall) != 0) abort(); \
81
+ } while (0)
82
+
83
+ Mutex::Mutex() { SAFE_PTHREAD(pthread_rwlock_init(&mutex_, NULL)); }
84
+ Mutex::~Mutex() { SAFE_PTHREAD(pthread_rwlock_destroy(&mutex_)); }
85
+ void Mutex::Lock() { SAFE_PTHREAD(pthread_rwlock_wrlock(&mutex_)); }
86
+ void Mutex::Unlock() { SAFE_PTHREAD(pthread_rwlock_unlock(&mutex_)); }
87
+ void Mutex::ReaderLock() { SAFE_PTHREAD(pthread_rwlock_rdlock(&mutex_)); }
88
+ void Mutex::ReaderUnlock() { SAFE_PTHREAD(pthread_rwlock_unlock(&mutex_)); }
89
+
90
+ #undef SAFE_PTHREAD
91
+
92
+ #else
93
+
94
+ Mutex::Mutex() { }
95
+ Mutex::~Mutex() { }
96
+ void Mutex::Lock() { mutex_.lock(); }
97
+ void Mutex::Unlock() { mutex_.unlock(); }
98
+ void Mutex::ReaderLock() { Lock(); } // C++11 doesn't have std::shared_mutex.
99
+ void Mutex::ReaderUnlock() { Unlock(); }
100
+
101
+ #endif
102
+
103
+ // --------------------------------------------------------------------------
104
+ // Some helper classes
105
+
106
+ // MutexLock(mu) acquires mu when constructed and releases it when destroyed.
107
+ class MutexLock {
108
+ public:
109
+ explicit MutexLock(Mutex *mu) : mu_(mu) { mu_->Lock(); }
110
+ ~MutexLock() { mu_->Unlock(); }
111
+ private:
112
+ Mutex * const mu_;
113
+
114
+ MutexLock(const MutexLock&) = delete;
115
+ MutexLock& operator=(const MutexLock&) = delete;
116
+ };
117
+
118
+ // ReaderMutexLock and WriterMutexLock do the same, for rwlocks
119
+ class ReaderMutexLock {
120
+ public:
121
+ explicit ReaderMutexLock(Mutex *mu) : mu_(mu) { mu_->ReaderLock(); }
122
+ ~ReaderMutexLock() { mu_->ReaderUnlock(); }
123
+ private:
124
+ Mutex * const mu_;
125
+
126
+ ReaderMutexLock(const ReaderMutexLock&) = delete;
127
+ ReaderMutexLock& operator=(const ReaderMutexLock&) = delete;
128
+ };
129
+
130
+ class WriterMutexLock {
131
+ public:
132
+ explicit WriterMutexLock(Mutex *mu) : mu_(mu) { mu_->WriterLock(); }
133
+ ~WriterMutexLock() { mu_->WriterUnlock(); }
134
+ private:
135
+ Mutex * const mu_;
136
+
137
+ WriterMutexLock(const WriterMutexLock&) = delete;
138
+ WriterMutexLock& operator=(const WriterMutexLock&) = delete;
139
+ };
140
+
141
+ // Catch bug where variable name is omitted, e.g. MutexLock (&mu);
142
+ #define MutexLock(x) static_assert(false, "MutexLock declaration missing variable name")
143
+ #define ReaderMutexLock(x) static_assert(false, "ReaderMutexLock declaration missing variable name")
144
+ #define WriterMutexLock(x) static_assert(false, "WriterMutexLock declaration missing variable name")
145
+
146
+ } // namespace re2
147
+
148
+ #endif // UTIL_MUTEX_H_
@@ -0,0 +1,1025 @@
1
+ // Copyright 2003-2009 Google Inc. All rights reserved.
2
+ // Use of this source code is governed by a BSD-style
3
+ // license that can be found in the LICENSE file.
4
+
5
+ // This is a variant of PCRE's pcrecpp.cc, originally written at Google.
6
+ // The main changes are the addition of the HitLimit method and
7
+ // compilation as PCRE in namespace re2.
8
+
9
+ #include <assert.h>
10
+ #include <ctype.h>
11
+ #include <errno.h>
12
+ #include <stdlib.h>
13
+ #include <string.h>
14
+ #include <limits>
15
+ #include <string>
16
+ #include <utility>
17
+
18
+ #include "util/util.h"
19
+ #include "util/flags.h"
20
+ #include "util/logging.h"
21
+ #include "util/pcre.h"
22
+ #include "util/strutil.h"
23
+
24
+ // Silence warnings about the wacky formatting in the operator() functions.
25
+ #if !defined(__clang__) && defined(__GNUC__) && __GNUC__ >= 6
26
+ #pragma GCC diagnostic ignored "-Wmisleading-indentation"
27
+ #endif
28
+
29
+ #define PCREPORT(level) LOG(level)
30
+
31
+ // Default PCRE limits.
32
+ // Defaults chosen to allow a plausible amount of CPU and
33
+ // not exceed main thread stacks. Note that other threads
34
+ // often have smaller stacks, and therefore tightening
35
+ // regexp_stack_limit may frequently be necessary.
36
+ DEFINE_FLAG(int, regexp_stack_limit, 256 << 10,
37
+ "default PCRE stack limit (bytes)");
38
+ DEFINE_FLAG(int, regexp_match_limit, 1000000,
39
+ "default PCRE match limit (function calls)");
40
+
41
+ #ifndef USEPCRE
42
+
43
+ // Fake just enough of the PCRE API to allow this file to build. :)
44
+
45
+ struct pcre_extra {
46
+ int flags;
47
+ int match_limit;
48
+ int match_limit_recursion;
49
+ };
50
+
51
+ #define PCRE_EXTRA_MATCH_LIMIT 0
52
+ #define PCRE_EXTRA_MATCH_LIMIT_RECURSION 0
53
+ #define PCRE_ANCHORED 0
54
+ #define PCRE_NOTEMPTY 0
55
+ #define PCRE_ERROR_NOMATCH 1
56
+ #define PCRE_ERROR_MATCHLIMIT 2
57
+ #define PCRE_ERROR_RECURSIONLIMIT 3
58
+ #define PCRE_INFO_CAPTURECOUNT 0
59
+
60
+ void pcre_free(void*) {
61
+ }
62
+
63
+ pcre* pcre_compile(const char*, int, const char**, int*, const unsigned char*) {
64
+ return NULL;
65
+ }
66
+
67
+ int pcre_exec(const pcre*, const pcre_extra*, const char*, int, int, int, int*, int) {
68
+ return 0;
69
+ }
70
+
71
+ int pcre_fullinfo(const pcre*, const pcre_extra*, int, void*) {
72
+ return 0;
73
+ }
74
+
75
+ #endif
76
+
77
+ namespace re2 {
78
+
79
+ // Maximum number of args we can set
80
+ static const int kMaxArgs = 16;
81
+ static const int kVecSize = (1 + kMaxArgs) * 3; // results + PCRE workspace
82
+
83
+ // Approximate size of a recursive invocation of PCRE's
84
+ // internal "match()" frame. This varies depending on the
85
+ // compiler and architecture, of course, so the constant is
86
+ // just a conservative estimate. To find the exact number,
87
+ // run regexp_unittest with --regexp_stack_limit=0 under
88
+ // a debugger and look at the frames when it crashes.
89
+ // The exact frame size was 656 in production on 2008/02/03.
90
+ static const int kPCREFrameSize = 700;
91
+
92
+ // Special name for missing C++ arguments.
93
+ PCRE::Arg PCRE::no_more_args((void*)NULL);
94
+
95
+ const PCRE::PartialMatchFunctor PCRE::PartialMatch = { };
96
+ const PCRE::FullMatchFunctor PCRE::FullMatch = { } ;
97
+ const PCRE::ConsumeFunctor PCRE::Consume = { };
98
+ const PCRE::FindAndConsumeFunctor PCRE::FindAndConsume = { };
99
+
100
+ // If a regular expression has no error, its error_ field points here
101
+ static const std::string empty_string;
102
+
103
+ void PCRE::Init(const char* pattern, Option options, int match_limit,
104
+ int stack_limit, bool report_errors) {
105
+ pattern_ = pattern;
106
+ options_ = options;
107
+ match_limit_ = match_limit;
108
+ stack_limit_ = stack_limit;
109
+ hit_limit_ = false;
110
+ error_ = &empty_string;
111
+ report_errors_ = report_errors;
112
+ re_full_ = NULL;
113
+ re_partial_ = NULL;
114
+
115
+ if (options & ~(EnabledCompileOptions | EnabledExecOptions)) {
116
+ error_ = new std::string("illegal regexp option");
117
+ PCREPORT(ERROR)
118
+ << "Error compiling '" << pattern << "': illegal regexp option";
119
+ } else {
120
+ re_partial_ = Compile(UNANCHORED);
121
+ if (re_partial_ != NULL) {
122
+ re_full_ = Compile(ANCHOR_BOTH);
123
+ }
124
+ }
125
+ }
126
+
127
+ PCRE::PCRE(const char* pattern) {
128
+ Init(pattern, None, 0, 0, true);
129
+ }
130
+ PCRE::PCRE(const char* pattern, Option option) {
131
+ Init(pattern, option, 0, 0, true);
132
+ }
133
+ PCRE::PCRE(const std::string& pattern) {
134
+ Init(pattern.c_str(), None, 0, 0, true);
135
+ }
136
+ PCRE::PCRE(const std::string& pattern, Option option) {
137
+ Init(pattern.c_str(), option, 0, 0, true);
138
+ }
139
+ PCRE::PCRE(const std::string& pattern, const PCRE_Options& re_option) {
140
+ Init(pattern.c_str(), re_option.option(), re_option.match_limit(),
141
+ re_option.stack_limit(), re_option.report_errors());
142
+ }
143
+
144
+ PCRE::PCRE(const char *pattern, const PCRE_Options& re_option) {
145
+ Init(pattern, re_option.option(), re_option.match_limit(),
146
+ re_option.stack_limit(), re_option.report_errors());
147
+ }
148
+
149
+ PCRE::~PCRE() {
150
+ if (re_full_ != NULL) pcre_free(re_full_);
151
+ if (re_partial_ != NULL) pcre_free(re_partial_);
152
+ if (error_ != &empty_string) delete error_;
153
+ }
154
+
155
+ pcre* PCRE::Compile(Anchor anchor) {
156
+ // Special treatment for anchoring. This is needed because at
157
+ // runtime pcre only provides an option for anchoring at the
158
+ // beginning of a string.
159
+ //
160
+ // There are three types of anchoring we want:
161
+ // UNANCHORED Compile the original pattern, and use
162
+ // a pcre unanchored match.
163
+ // ANCHOR_START Compile the original pattern, and use
164
+ // a pcre anchored match.
165
+ // ANCHOR_BOTH Tack a "\z" to the end of the original pattern
166
+ // and use a pcre anchored match.
167
+
168
+ const char* error = "";
169
+ int eoffset;
170
+ pcre* re;
171
+ if (anchor != ANCHOR_BOTH) {
172
+ re = pcre_compile(pattern_.c_str(),
173
+ (options_ & EnabledCompileOptions),
174
+ &error, &eoffset, NULL);
175
+ } else {
176
+ // Tack a '\z' at the end of PCRE. Parenthesize it first so that
177
+ // the '\z' applies to all top-level alternatives in the regexp.
178
+ std::string wrapped = "(?:"; // A non-counting grouping operator
179
+ wrapped += pattern_;
180
+ wrapped += ")\\z";
181
+ re = pcre_compile(wrapped.c_str(),
182
+ (options_ & EnabledCompileOptions),
183
+ &error, &eoffset, NULL);
184
+ }
185
+ if (re == NULL) {
186
+ if (error_ == &empty_string) error_ = new std::string(error);
187
+ PCREPORT(ERROR) << "Error compiling '" << pattern_ << "': " << error;
188
+ }
189
+ return re;
190
+ }
191
+
192
+ /***** Convenience interfaces *****/
193
+
194
+ bool PCRE::FullMatchFunctor::operator ()(const StringPiece& text,
195
+ const PCRE& re,
196
+ const Arg& a0,
197
+ const Arg& a1,
198
+ const Arg& a2,
199
+ const Arg& a3,
200
+ const Arg& a4,
201
+ const Arg& a5,
202
+ const Arg& a6,
203
+ const Arg& a7,
204
+ const Arg& a8,
205
+ const Arg& a9,
206
+ const Arg& a10,
207
+ const Arg& a11,
208
+ const Arg& a12,
209
+ const Arg& a13,
210
+ const Arg& a14,
211
+ const Arg& a15) const {
212
+ const Arg* args[kMaxArgs];
213
+ int n = 0;
214
+ if (&a0 == &no_more_args) goto done; args[n++] = &a0;
215
+ if (&a1 == &no_more_args) goto done; args[n++] = &a1;
216
+ if (&a2 == &no_more_args) goto done; args[n++] = &a2;
217
+ if (&a3 == &no_more_args) goto done; args[n++] = &a3;
218
+ if (&a4 == &no_more_args) goto done; args[n++] = &a4;
219
+ if (&a5 == &no_more_args) goto done; args[n++] = &a5;
220
+ if (&a6 == &no_more_args) goto done; args[n++] = &a6;
221
+ if (&a7 == &no_more_args) goto done; args[n++] = &a7;
222
+ if (&a8 == &no_more_args) goto done; args[n++] = &a8;
223
+ if (&a9 == &no_more_args) goto done; args[n++] = &a9;
224
+ if (&a10 == &no_more_args) goto done; args[n++] = &a10;
225
+ if (&a11 == &no_more_args) goto done; args[n++] = &a11;
226
+ if (&a12 == &no_more_args) goto done; args[n++] = &a12;
227
+ if (&a13 == &no_more_args) goto done; args[n++] = &a13;
228
+ if (&a14 == &no_more_args) goto done; args[n++] = &a14;
229
+ if (&a15 == &no_more_args) goto done; args[n++] = &a15;
230
+ done:
231
+
232
+ size_t consumed;
233
+ int vec[kVecSize] = {};
234
+ return re.DoMatchImpl(text, ANCHOR_BOTH, &consumed, args, n, vec, kVecSize);
235
+ }
236
+
237
+ bool PCRE::PartialMatchFunctor::operator ()(const StringPiece& text,
238
+ const PCRE& re,
239
+ const Arg& a0,
240
+ const Arg& a1,
241
+ const Arg& a2,
242
+ const Arg& a3,
243
+ const Arg& a4,
244
+ const Arg& a5,
245
+ const Arg& a6,
246
+ const Arg& a7,
247
+ const Arg& a8,
248
+ const Arg& a9,
249
+ const Arg& a10,
250
+ const Arg& a11,
251
+ const Arg& a12,
252
+ const Arg& a13,
253
+ const Arg& a14,
254
+ const Arg& a15) const {
255
+ const Arg* args[kMaxArgs];
256
+ int n = 0;
257
+ if (&a0 == &no_more_args) goto done; args[n++] = &a0;
258
+ if (&a1 == &no_more_args) goto done; args[n++] = &a1;
259
+ if (&a2 == &no_more_args) goto done; args[n++] = &a2;
260
+ if (&a3 == &no_more_args) goto done; args[n++] = &a3;
261
+ if (&a4 == &no_more_args) goto done; args[n++] = &a4;
262
+ if (&a5 == &no_more_args) goto done; args[n++] = &a5;
263
+ if (&a6 == &no_more_args) goto done; args[n++] = &a6;
264
+ if (&a7 == &no_more_args) goto done; args[n++] = &a7;
265
+ if (&a8 == &no_more_args) goto done; args[n++] = &a8;
266
+ if (&a9 == &no_more_args) goto done; args[n++] = &a9;
267
+ if (&a10 == &no_more_args) goto done; args[n++] = &a10;
268
+ if (&a11 == &no_more_args) goto done; args[n++] = &a11;
269
+ if (&a12 == &no_more_args) goto done; args[n++] = &a12;
270
+ if (&a13 == &no_more_args) goto done; args[n++] = &a13;
271
+ if (&a14 == &no_more_args) goto done; args[n++] = &a14;
272
+ if (&a15 == &no_more_args) goto done; args[n++] = &a15;
273
+ done:
274
+
275
+ size_t consumed;
276
+ int vec[kVecSize] = {};
277
+ return re.DoMatchImpl(text, UNANCHORED, &consumed, args, n, vec, kVecSize);
278
+ }
279
+
280
+ bool PCRE::ConsumeFunctor::operator ()(StringPiece* input,
281
+ const PCRE& pattern,
282
+ const Arg& a0,
283
+ const Arg& a1,
284
+ const Arg& a2,
285
+ const Arg& a3,
286
+ const Arg& a4,
287
+ const Arg& a5,
288
+ const Arg& a6,
289
+ const Arg& a7,
290
+ const Arg& a8,
291
+ const Arg& a9,
292
+ const Arg& a10,
293
+ const Arg& a11,
294
+ const Arg& a12,
295
+ const Arg& a13,
296
+ const Arg& a14,
297
+ const Arg& a15) const {
298
+ const Arg* args[kMaxArgs];
299
+ int n = 0;
300
+ if (&a0 == &no_more_args) goto done; args[n++] = &a0;
301
+ if (&a1 == &no_more_args) goto done; args[n++] = &a1;
302
+ if (&a2 == &no_more_args) goto done; args[n++] = &a2;
303
+ if (&a3 == &no_more_args) goto done; args[n++] = &a3;
304
+ if (&a4 == &no_more_args) goto done; args[n++] = &a4;
305
+ if (&a5 == &no_more_args) goto done; args[n++] = &a5;
306
+ if (&a6 == &no_more_args) goto done; args[n++] = &a6;
307
+ if (&a7 == &no_more_args) goto done; args[n++] = &a7;
308
+ if (&a8 == &no_more_args) goto done; args[n++] = &a8;
309
+ if (&a9 == &no_more_args) goto done; args[n++] = &a9;
310
+ if (&a10 == &no_more_args) goto done; args[n++] = &a10;
311
+ if (&a11 == &no_more_args) goto done; args[n++] = &a11;
312
+ if (&a12 == &no_more_args) goto done; args[n++] = &a12;
313
+ if (&a13 == &no_more_args) goto done; args[n++] = &a13;
314
+ if (&a14 == &no_more_args) goto done; args[n++] = &a14;
315
+ if (&a15 == &no_more_args) goto done; args[n++] = &a15;
316
+ done:
317
+
318
+ size_t consumed;
319
+ int vec[kVecSize] = {};
320
+ if (pattern.DoMatchImpl(*input, ANCHOR_START, &consumed,
321
+ args, n, vec, kVecSize)) {
322
+ input->remove_prefix(consumed);
323
+ return true;
324
+ } else {
325
+ return false;
326
+ }
327
+ }
328
+
329
+ bool PCRE::FindAndConsumeFunctor::operator ()(StringPiece* input,
330
+ const PCRE& pattern,
331
+ const Arg& a0,
332
+ const Arg& a1,
333
+ const Arg& a2,
334
+ const Arg& a3,
335
+ const Arg& a4,
336
+ const Arg& a5,
337
+ const Arg& a6,
338
+ const Arg& a7,
339
+ const Arg& a8,
340
+ const Arg& a9,
341
+ const Arg& a10,
342
+ const Arg& a11,
343
+ const Arg& a12,
344
+ const Arg& a13,
345
+ const Arg& a14,
346
+ const Arg& a15) const {
347
+ const Arg* args[kMaxArgs];
348
+ int n = 0;
349
+ if (&a0 == &no_more_args) goto done; args[n++] = &a0;
350
+ if (&a1 == &no_more_args) goto done; args[n++] = &a1;
351
+ if (&a2 == &no_more_args) goto done; args[n++] = &a2;
352
+ if (&a3 == &no_more_args) goto done; args[n++] = &a3;
353
+ if (&a4 == &no_more_args) goto done; args[n++] = &a4;
354
+ if (&a5 == &no_more_args) goto done; args[n++] = &a5;
355
+ if (&a6 == &no_more_args) goto done; args[n++] = &a6;
356
+ if (&a7 == &no_more_args) goto done; args[n++] = &a7;
357
+ if (&a8 == &no_more_args) goto done; args[n++] = &a8;
358
+ if (&a9 == &no_more_args) goto done; args[n++] = &a9;
359
+ if (&a10 == &no_more_args) goto done; args[n++] = &a10;
360
+ if (&a11 == &no_more_args) goto done; args[n++] = &a11;
361
+ if (&a12 == &no_more_args) goto done; args[n++] = &a12;
362
+ if (&a13 == &no_more_args) goto done; args[n++] = &a13;
363
+ if (&a14 == &no_more_args) goto done; args[n++] = &a14;
364
+ if (&a15 == &no_more_args) goto done; args[n++] = &a15;
365
+ done:
366
+
367
+ size_t consumed;
368
+ int vec[kVecSize] = {};
369
+ if (pattern.DoMatchImpl(*input, UNANCHORED, &consumed,
370
+ args, n, vec, kVecSize)) {
371
+ input->remove_prefix(consumed);
372
+ return true;
373
+ } else {
374
+ return false;
375
+ }
376
+ }
377
+
378
+ bool PCRE::Replace(std::string *str,
379
+ const PCRE& pattern,
380
+ const StringPiece& rewrite) {
381
+ int vec[kVecSize] = {};
382
+ int matches = pattern.TryMatch(*str, 0, UNANCHORED, true, vec, kVecSize);
383
+ if (matches == 0)
384
+ return false;
385
+
386
+ std::string s;
387
+ if (!pattern.Rewrite(&s, rewrite, *str, vec, matches))
388
+ return false;
389
+
390
+ assert(vec[0] >= 0);
391
+ assert(vec[1] >= 0);
392
+ str->replace(vec[0], vec[1] - vec[0], s);
393
+ return true;
394
+ }
395
+
396
+ int PCRE::GlobalReplace(std::string *str,
397
+ const PCRE& pattern,
398
+ const StringPiece& rewrite) {
399
+ int count = 0;
400
+ int vec[kVecSize] = {};
401
+ std::string out;
402
+ size_t start = 0;
403
+ bool last_match_was_empty_string = false;
404
+
405
+ while (start <= str->size()) {
406
+ // If the previous match was for the empty string, we shouldn't
407
+ // just match again: we'll match in the same way and get an
408
+ // infinite loop. Instead, we do the match in a special way:
409
+ // anchored -- to force another try at the same position --
410
+ // and with a flag saying that this time, ignore empty matches.
411
+ // If this special match returns, that means there's a non-empty
412
+ // match at this position as well, and we can continue. If not,
413
+ // we do what perl does, and just advance by one.
414
+ // Notice that perl prints '@@@' for this;
415
+ // perl -le '$_ = "aa"; s/b*|aa/@/g; print'
416
+ int matches;
417
+ if (last_match_was_empty_string) {
418
+ matches = pattern.TryMatch(*str, start, ANCHOR_START, false,
419
+ vec, kVecSize);
420
+ if (matches <= 0) {
421
+ if (start < str->size())
422
+ out.push_back((*str)[start]);
423
+ start++;
424
+ last_match_was_empty_string = false;
425
+ continue;
426
+ }
427
+ } else {
428
+ matches = pattern.TryMatch(*str, start, UNANCHORED, true,
429
+ vec, kVecSize);
430
+ if (matches <= 0)
431
+ break;
432
+ }
433
+ size_t matchstart = vec[0], matchend = vec[1];
434
+ assert(matchstart >= start);
435
+ assert(matchend >= matchstart);
436
+
437
+ out.append(*str, start, matchstart - start);
438
+ pattern.Rewrite(&out, rewrite, *str, vec, matches);
439
+ start = matchend;
440
+ count++;
441
+ last_match_was_empty_string = (matchstart == matchend);
442
+ }
443
+
444
+ if (count == 0)
445
+ return 0;
446
+
447
+ if (start < str->size())
448
+ out.append(*str, start, str->size() - start);
449
+ using std::swap;
450
+ swap(out, *str);
451
+ return count;
452
+ }
453
+
454
+ bool PCRE::Extract(const StringPiece &text,
455
+ const PCRE& pattern,
456
+ const StringPiece &rewrite,
457
+ std::string *out) {
458
+ int vec[kVecSize] = {};
459
+ int matches = pattern.TryMatch(text, 0, UNANCHORED, true, vec, kVecSize);
460
+ if (matches == 0)
461
+ return false;
462
+ out->clear();
463
+ return pattern.Rewrite(out, rewrite, text, vec, matches);
464
+ }
465
+
466
+ std::string PCRE::QuoteMeta(const StringPiece& unquoted) {
467
+ std::string result;
468
+ result.reserve(unquoted.size() << 1);
469
+
470
+ // Escape any ascii character not in [A-Za-z_0-9].
471
+ //
472
+ // Note that it's legal to escape a character even if it has no
473
+ // special meaning in a regular expression -- so this function does
474
+ // that. (This also makes it identical to the perl function of the
475
+ // same name except for the null-character special case;
476
+ // see `perldoc -f quotemeta`.)
477
+ for (size_t ii = 0; ii < unquoted.size(); ++ii) {
478
+ // Note that using 'isalnum' here raises the benchmark time from
479
+ // 32ns to 58ns:
480
+ if ((unquoted[ii] < 'a' || unquoted[ii] > 'z') &&
481
+ (unquoted[ii] < 'A' || unquoted[ii] > 'Z') &&
482
+ (unquoted[ii] < '0' || unquoted[ii] > '9') &&
483
+ unquoted[ii] != '_' &&
484
+ // If this is the part of a UTF8 or Latin1 character, we need
485
+ // to copy this byte without escaping. Experimentally this is
486
+ // what works correctly with the regexp library.
487
+ !(unquoted[ii] & 128)) {
488
+ if (unquoted[ii] == '\0') { // Special handling for null chars.
489
+ // Can't use "\\0" since the next character might be a digit.
490
+ result += "\\x00";
491
+ continue;
492
+ }
493
+ result += '\\';
494
+ }
495
+ result += unquoted[ii];
496
+ }
497
+
498
+ return result;
499
+ }
500
+
501
+ /***** Actual matching and rewriting code *****/
502
+
503
+ bool PCRE::HitLimit() {
504
+ return hit_limit_ != 0;
505
+ }
506
+
507
+ void PCRE::ClearHitLimit() {
508
+ hit_limit_ = 0;
509
+ }
510
+
511
+ int PCRE::TryMatch(const StringPiece& text,
512
+ size_t startpos,
513
+ Anchor anchor,
514
+ bool empty_ok,
515
+ int *vec,
516
+ int vecsize) const {
517
+ pcre* re = (anchor == ANCHOR_BOTH) ? re_full_ : re_partial_;
518
+ if (re == NULL) {
519
+ PCREPORT(ERROR) << "Matching against invalid re: " << *error_;
520
+ return 0;
521
+ }
522
+
523
+ int match_limit = match_limit_;
524
+ if (match_limit <= 0) {
525
+ match_limit = GetFlag(FLAGS_regexp_match_limit);
526
+ }
527
+
528
+ int stack_limit = stack_limit_;
529
+ if (stack_limit <= 0) {
530
+ stack_limit = GetFlag(FLAGS_regexp_stack_limit);
531
+ }
532
+
533
+ pcre_extra extra = { 0 };
534
+ if (match_limit > 0) {
535
+ extra.flags |= PCRE_EXTRA_MATCH_LIMIT;
536
+ extra.match_limit = match_limit;
537
+ }
538
+ if (stack_limit > 0) {
539
+ extra.flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
540
+ extra.match_limit_recursion = stack_limit / kPCREFrameSize;
541
+ }
542
+
543
+ int options = 0;
544
+ if (anchor != UNANCHORED)
545
+ options |= PCRE_ANCHORED;
546
+ if (!empty_ok)
547
+ options |= PCRE_NOTEMPTY;
548
+
549
+ int rc = pcre_exec(re, // The regular expression object
550
+ &extra,
551
+ (text.data() == NULL) ? "" : text.data(),
552
+ static_cast<int>(text.size()),
553
+ static_cast<int>(startpos),
554
+ options,
555
+ vec,
556
+ vecsize);
557
+
558
+ // Handle errors
559
+ if (rc == 0) {
560
+ // pcre_exec() returns 0 as a special case when the number of
561
+ // capturing subpatterns exceeds the size of the vector.
562
+ // When this happens, there is a match and the output vector
563
+ // is filled, but we miss out on the positions of the extra subpatterns.
564
+ rc = vecsize / 2;
565
+ } else if (rc < 0) {
566
+ switch (rc) {
567
+ case PCRE_ERROR_NOMATCH:
568
+ return 0;
569
+ case PCRE_ERROR_MATCHLIMIT:
570
+ // Writing to hit_limit is not safe if multiple threads
571
+ // are using the PCRE, but the flag is only intended
572
+ // for use by unit tests anyway, so we let it go.
573
+ hit_limit_ = true;
574
+ PCREPORT(WARNING) << "Exceeded match limit of " << match_limit
575
+ << " when matching '" << pattern_ << "'"
576
+ << " against text that is " << text.size() << " bytes.";
577
+ return 0;
578
+ case PCRE_ERROR_RECURSIONLIMIT:
579
+ // See comment about hit_limit above.
580
+ hit_limit_ = true;
581
+ PCREPORT(WARNING) << "Exceeded stack limit of " << stack_limit
582
+ << " when matching '" << pattern_ << "'"
583
+ << " against text that is " << text.size() << " bytes.";
584
+ return 0;
585
+ default:
586
+ // There are other return codes from pcre.h :
587
+ // PCRE_ERROR_NULL (-2)
588
+ // PCRE_ERROR_BADOPTION (-3)
589
+ // PCRE_ERROR_BADMAGIC (-4)
590
+ // PCRE_ERROR_UNKNOWN_NODE (-5)
591
+ // PCRE_ERROR_NOMEMORY (-6)
592
+ // PCRE_ERROR_NOSUBSTRING (-7)
593
+ // ...
594
+ PCREPORT(ERROR) << "Unexpected return code: " << rc
595
+ << " when matching '" << pattern_ << "'"
596
+ << ", re=" << re
597
+ << ", text=" << text
598
+ << ", vec=" << vec
599
+ << ", vecsize=" << vecsize;
600
+ return 0;
601
+ }
602
+ }
603
+
604
+ return rc;
605
+ }
606
+
607
+ bool PCRE::DoMatchImpl(const StringPiece& text,
608
+ Anchor anchor,
609
+ size_t* consumed,
610
+ const Arg* const* args,
611
+ int n,
612
+ int* vec,
613
+ int vecsize) const {
614
+ assert((1 + n) * 3 <= vecsize); // results + PCRE workspace
615
+ if (NumberOfCapturingGroups() < n) {
616
+ // RE has fewer capturing groups than number of Arg pointers passed in.
617
+ return false;
618
+ }
619
+
620
+ int matches = TryMatch(text, 0, anchor, true, vec, vecsize);
621
+ assert(matches >= 0); // TryMatch never returns negatives
622
+ if (matches == 0)
623
+ return false;
624
+
625
+ *consumed = vec[1];
626
+
627
+ if (n == 0 || args == NULL) {
628
+ // We are not interested in results
629
+ return true;
630
+ }
631
+
632
+ // If we got here, we must have matched the whole pattern.
633
+ // We do not need (can not do) any more checks on the value of 'matches' here
634
+ // -- see the comment for TryMatch.
635
+ for (int i = 0; i < n; i++) {
636
+ const int start = vec[2*(i+1)];
637
+ const int limit = vec[2*(i+1)+1];
638
+
639
+ // Avoid invoking undefined behavior when text.data() happens
640
+ // to be null and start happens to be -1, the latter being the
641
+ // case for an unmatched subexpression. Even if text.data() is
642
+ // not null, pointing one byte before was a longstanding bug.
643
+ const char* addr = NULL;
644
+ if (start != -1) {
645
+ addr = text.data() + start;
646
+ }
647
+
648
+ if (!args[i]->Parse(addr, limit-start)) {
649
+ // TODO: Should we indicate what the error was?
650
+ return false;
651
+ }
652
+ }
653
+
654
+ return true;
655
+ }
656
+
657
+ bool PCRE::DoMatch(const StringPiece& text,
658
+ Anchor anchor,
659
+ size_t* consumed,
660
+ const Arg* const args[],
661
+ int n) const {
662
+ assert(n >= 0);
663
+ const int vecsize = (1 + n) * 3; // results + PCRE workspace
664
+ // (as for kVecSize)
665
+ int* vec = new int[vecsize];
666
+ bool b = DoMatchImpl(text, anchor, consumed, args, n, vec, vecsize);
667
+ delete[] vec;
668
+ return b;
669
+ }
670
+
671
+ bool PCRE::Rewrite(std::string *out, const StringPiece &rewrite,
672
+ const StringPiece &text, int *vec, int veclen) const {
673
+ int number_of_capturing_groups = NumberOfCapturingGroups();
674
+ for (const char *s = rewrite.data(), *end = s + rewrite.size();
675
+ s < end; s++) {
676
+ int c = *s;
677
+ if (c == '\\') {
678
+ c = *++s;
679
+ if (isdigit(c)) {
680
+ int n = (c - '0');
681
+ if (n >= veclen) {
682
+ if (n <= number_of_capturing_groups) {
683
+ // unmatched optional capturing group. treat
684
+ // its value as empty string; i.e., nothing to append.
685
+ } else {
686
+ PCREPORT(ERROR) << "requested group " << n
687
+ << " in regexp " << rewrite.data();
688
+ return false;
689
+ }
690
+ }
691
+ int start = vec[2 * n];
692
+ if (start >= 0)
693
+ out->append(text.data() + start, vec[2 * n + 1] - start);
694
+ } else if (c == '\\') {
695
+ out->push_back('\\');
696
+ } else {
697
+ PCREPORT(ERROR) << "invalid rewrite pattern: " << rewrite.data();
698
+ return false;
699
+ }
700
+ } else {
701
+ out->push_back(c);
702
+ }
703
+ }
704
+ return true;
705
+ }
706
+
707
+ bool PCRE::CheckRewriteString(const StringPiece& rewrite,
708
+ std::string* error) const {
709
+ int max_token = -1;
710
+ for (const char *s = rewrite.data(), *end = s + rewrite.size();
711
+ s < end; s++) {
712
+ int c = *s;
713
+ if (c != '\\') {
714
+ continue;
715
+ }
716
+ if (++s == end) {
717
+ *error = "Rewrite schema error: '\\' not allowed at end.";
718
+ return false;
719
+ }
720
+ c = *s;
721
+ if (c == '\\') {
722
+ continue;
723
+ }
724
+ if (!isdigit(c)) {
725
+ *error = "Rewrite schema error: "
726
+ "'\\' must be followed by a digit or '\\'.";
727
+ return false;
728
+ }
729
+ int n = (c - '0');
730
+ if (max_token < n) {
731
+ max_token = n;
732
+ }
733
+ }
734
+
735
+ if (max_token > NumberOfCapturingGroups()) {
736
+ *error = StringPrintf(
737
+ "Rewrite schema requests %d matches, but the regexp only has %d "
738
+ "parenthesized subexpressions.",
739
+ max_token, NumberOfCapturingGroups());
740
+ return false;
741
+ }
742
+ return true;
743
+ }
744
+
745
+
746
+ // Return the number of capturing subpatterns, or -1 if the
747
+ // regexp wasn't valid on construction.
748
+ int PCRE::NumberOfCapturingGroups() const {
749
+ if (re_partial_ == NULL) return -1;
750
+
751
+ int result;
752
+ int rc = pcre_fullinfo(re_partial_, // The regular expression object
753
+ NULL, // We did not study the pattern
754
+ PCRE_INFO_CAPTURECOUNT,
755
+ &result);
756
+ if (rc != 0) {
757
+ PCREPORT(ERROR) << "Unexpected return code: " << rc;
758
+ return -1;
759
+ }
760
+ return result;
761
+ }
762
+
763
+
764
+ /***** Parsers for various types *****/
765
+
766
+ bool PCRE::Arg::parse_null(const char* str, size_t n, void* dest) {
767
+ // We fail if somebody asked us to store into a non-NULL void* pointer
768
+ return (dest == NULL);
769
+ }
770
+
771
+ bool PCRE::Arg::parse_string(const char* str, size_t n, void* dest) {
772
+ if (dest == NULL) return true;
773
+ reinterpret_cast<std::string*>(dest)->assign(str, n);
774
+ return true;
775
+ }
776
+
777
+ bool PCRE::Arg::parse_stringpiece(const char* str, size_t n, void* dest) {
778
+ if (dest == NULL) return true;
779
+ *(reinterpret_cast<StringPiece*>(dest)) = StringPiece(str, n);
780
+ return true;
781
+ }
782
+
783
+ bool PCRE::Arg::parse_char(const char* str, size_t n, void* dest) {
784
+ if (n != 1) return false;
785
+ if (dest == NULL) return true;
786
+ *(reinterpret_cast<char*>(dest)) = str[0];
787
+ return true;
788
+ }
789
+
790
+ bool PCRE::Arg::parse_schar(const char* str, size_t n, void* dest) {
791
+ if (n != 1) return false;
792
+ if (dest == NULL) return true;
793
+ *(reinterpret_cast<signed char*>(dest)) = str[0];
794
+ return true;
795
+ }
796
+
797
+ bool PCRE::Arg::parse_uchar(const char* str, size_t n, void* dest) {
798
+ if (n != 1) return false;
799
+ if (dest == NULL) return true;
800
+ *(reinterpret_cast<unsigned char*>(dest)) = str[0];
801
+ return true;
802
+ }
803
+
804
+ // Largest number spec that we are willing to parse
805
+ static const int kMaxNumberLength = 32;
806
+
807
+ // PCREQUIPCRES "buf" must have length at least kMaxNumberLength+1
808
+ // PCREQUIPCRES "n > 0"
809
+ // Copies "str" into "buf" and null-terminates if necessary.
810
+ // Returns one of:
811
+ // a. "str" if no termination is needed
812
+ // b. "buf" if the string was copied and null-terminated
813
+ // c. "" if the input was invalid and has no hope of being parsed
814
+ static const char* TerminateNumber(char* buf, const char* str, size_t n) {
815
+ if ((n > 0) && isspace(*str)) {
816
+ // We are less forgiving than the strtoxxx() routines and do not
817
+ // allow leading spaces.
818
+ return "";
819
+ }
820
+
821
+ // See if the character right after the input text may potentially
822
+ // look like a digit.
823
+ if (isdigit(str[n]) ||
824
+ ((str[n] >= 'a') && (str[n] <= 'f')) ||
825
+ ((str[n] >= 'A') && (str[n] <= 'F'))) {
826
+ if (n > kMaxNumberLength) return ""; // Input too big to be a valid number
827
+ memcpy(buf, str, n);
828
+ buf[n] = '\0';
829
+ return buf;
830
+ } else {
831
+ // We can parse right out of the supplied string, so return it.
832
+ return str;
833
+ }
834
+ }
835
+
836
+ bool PCRE::Arg::parse_long_radix(const char* str,
837
+ size_t n,
838
+ void* dest,
839
+ int radix) {
840
+ if (n == 0) return false;
841
+ char buf[kMaxNumberLength+1];
842
+ str = TerminateNumber(buf, str, n);
843
+ char* end;
844
+ errno = 0;
845
+ long r = strtol(str, &end, radix);
846
+ if (end != str + n) return false; // Leftover junk
847
+ if (errno) return false;
848
+ if (dest == NULL) return true;
849
+ *(reinterpret_cast<long*>(dest)) = r;
850
+ return true;
851
+ }
852
+
853
+ bool PCRE::Arg::parse_ulong_radix(const char* str,
854
+ size_t n,
855
+ void* dest,
856
+ int radix) {
857
+ if (n == 0) return false;
858
+ char buf[kMaxNumberLength+1];
859
+ str = TerminateNumber(buf, str, n);
860
+ if (str[0] == '-') {
861
+ // strtoul() will silently accept negative numbers and parse
862
+ // them. This module is more strict and treats them as errors.
863
+ return false;
864
+ }
865
+
866
+ char* end;
867
+ errno = 0;
868
+ unsigned long r = strtoul(str, &end, radix);
869
+ if (end != str + n) return false; // Leftover junk
870
+ if (errno) return false;
871
+ if (dest == NULL) return true;
872
+ *(reinterpret_cast<unsigned long*>(dest)) = r;
873
+ return true;
874
+ }
875
+
876
+ bool PCRE::Arg::parse_short_radix(const char* str,
877
+ size_t n,
878
+ void* dest,
879
+ int radix) {
880
+ long r;
881
+ if (!parse_long_radix(str, n, &r, radix)) return false; // Could not parse
882
+ if ((short)r != r) return false; // Out of range
883
+ if (dest == NULL) return true;
884
+ *(reinterpret_cast<short*>(dest)) = (short)r;
885
+ return true;
886
+ }
887
+
888
+ bool PCRE::Arg::parse_ushort_radix(const char* str,
889
+ size_t n,
890
+ void* dest,
891
+ int radix) {
892
+ unsigned long r;
893
+ if (!parse_ulong_radix(str, n, &r, radix)) return false; // Could not parse
894
+ if ((unsigned short)r != r) return false; // Out of range
895
+ if (dest == NULL) return true;
896
+ *(reinterpret_cast<unsigned short*>(dest)) = (unsigned short)r;
897
+ return true;
898
+ }
899
+
900
+ bool PCRE::Arg::parse_int_radix(const char* str,
901
+ size_t n,
902
+ void* dest,
903
+ int radix) {
904
+ long r;
905
+ if (!parse_long_radix(str, n, &r, radix)) return false; // Could not parse
906
+ if ((int)r != r) return false; // Out of range
907
+ if (dest == NULL) return true;
908
+ *(reinterpret_cast<int*>(dest)) = (int)r;
909
+ return true;
910
+ }
911
+
912
+ bool PCRE::Arg::parse_uint_radix(const char* str,
913
+ size_t n,
914
+ void* dest,
915
+ int radix) {
916
+ unsigned long r;
917
+ if (!parse_ulong_radix(str, n, &r, radix)) return false; // Could not parse
918
+ if ((unsigned int)r != r) return false; // Out of range
919
+ if (dest == NULL) return true;
920
+ *(reinterpret_cast<unsigned int*>(dest)) = (unsigned int)r;
921
+ return true;
922
+ }
923
+
924
+ bool PCRE::Arg::parse_longlong_radix(const char* str,
925
+ size_t n,
926
+ void* dest,
927
+ int radix) {
928
+ if (n == 0) return false;
929
+ char buf[kMaxNumberLength+1];
930
+ str = TerminateNumber(buf, str, n);
931
+ char* end;
932
+ errno = 0;
933
+ long long r = strtoll(str, &end, radix);
934
+ if (end != str + n) return false; // Leftover junk
935
+ if (errno) return false;
936
+ if (dest == NULL) return true;
937
+ *(reinterpret_cast<long long*>(dest)) = r;
938
+ return true;
939
+ }
940
+
941
+ bool PCRE::Arg::parse_ulonglong_radix(const char* str,
942
+ size_t n,
943
+ void* dest,
944
+ int radix) {
945
+ if (n == 0) return false;
946
+ char buf[kMaxNumberLength+1];
947
+ str = TerminateNumber(buf, str, n);
948
+ if (str[0] == '-') {
949
+ // strtoull() will silently accept negative numbers and parse
950
+ // them. This module is more strict and treats them as errors.
951
+ return false;
952
+ }
953
+ char* end;
954
+ errno = 0;
955
+ unsigned long long r = strtoull(str, &end, radix);
956
+ if (end != str + n) return false; // Leftover junk
957
+ if (errno) return false;
958
+ if (dest == NULL) return true;
959
+ *(reinterpret_cast<unsigned long long*>(dest)) = r;
960
+ return true;
961
+ }
962
+
963
+ static bool parse_double_float(const char* str, size_t n, bool isfloat,
964
+ void* dest) {
965
+ if (n == 0) return false;
966
+ static const int kMaxLength = 200;
967
+ char buf[kMaxLength];
968
+ if (n >= kMaxLength) return false;
969
+ memcpy(buf, str, n);
970
+ buf[n] = '\0';
971
+ char* end;
972
+ errno = 0;
973
+ double r;
974
+ if (isfloat) {
975
+ r = strtof(buf, &end);
976
+ } else {
977
+ r = strtod(buf, &end);
978
+ }
979
+ if (end != buf + n) return false; // Leftover junk
980
+ if (errno) return false;
981
+ if (dest == NULL) return true;
982
+ if (isfloat) {
983
+ *(reinterpret_cast<float*>(dest)) = (float)r;
984
+ } else {
985
+ *(reinterpret_cast<double*>(dest)) = r;
986
+ }
987
+ return true;
988
+ }
989
+
990
+ bool PCRE::Arg::parse_double(const char* str, size_t n, void* dest) {
991
+ return parse_double_float(str, n, false, dest);
992
+ }
993
+
994
+ bool PCRE::Arg::parse_float(const char* str, size_t n, void* dest) {
995
+ return parse_double_float(str, n, true, dest);
996
+ }
997
+
998
+ #define DEFINE_INTEGER_PARSER(name) \
999
+ bool PCRE::Arg::parse_##name(const char* str, size_t n, void* dest) { \
1000
+ return parse_##name##_radix(str, n, dest, 10); \
1001
+ } \
1002
+ bool PCRE::Arg::parse_##name##_hex(const char* str, size_t n, void* dest) { \
1003
+ return parse_##name##_radix(str, n, dest, 16); \
1004
+ } \
1005
+ bool PCRE::Arg::parse_##name##_octal(const char* str, size_t n, \
1006
+ void* dest) { \
1007
+ return parse_##name##_radix(str, n, dest, 8); \
1008
+ } \
1009
+ bool PCRE::Arg::parse_##name##_cradix(const char* str, size_t n, \
1010
+ void* dest) { \
1011
+ return parse_##name##_radix(str, n, dest, 0); \
1012
+ }
1013
+
1014
+ DEFINE_INTEGER_PARSER(short);
1015
+ DEFINE_INTEGER_PARSER(ushort);
1016
+ DEFINE_INTEGER_PARSER(int);
1017
+ DEFINE_INTEGER_PARSER(uint);
1018
+ DEFINE_INTEGER_PARSER(long);
1019
+ DEFINE_INTEGER_PARSER(ulong);
1020
+ DEFINE_INTEGER_PARSER(longlong);
1021
+ DEFINE_INTEGER_PARSER(ulonglong);
1022
+
1023
+ #undef DEFINE_INTEGER_PARSER
1024
+
1025
+ } // namespace re2