RubyGems - grpc - Versions diffs - 1.30.0 → 1.31.1 - Mend

grpc 1.30.0 → 1.31.1

Potentially problematic release.

This version of grpc might be problematic. Click here for more details.

Files changed (383) hide show

@@ -0,0 +1,436 @@
+// Copyright 2007 The RE2 Authors.  All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+#ifndef RE2_PROG_H_
+#define RE2_PROG_H_
+// Compiled representation of regular expressions.
+// See regexp.h for the Regexp class, which represents a regular
+// expression symbolically.
+#include <stdint.h>
+#include <functional>
+#include <mutex>
+#include <string>
+#include <vector>
+#include <type_traits>
+#include "util/util.h"
+#include "util/logging.h"
+#include "re2/pod_array.h"
+#include "re2/re2.h"
+#include "re2/sparse_array.h"
+#include "re2/sparse_set.h"
+namespace re2 {
+// Opcodes for Inst
+enum InstOp {
+  kInstAlt = 0,      // choose between out_ and out1_
+  kInstAltMatch,     // Alt: out_ is [00-FF] and back, out1_ is match; or vice versa.
+  kInstByteRange,    // next (possible case-folded) byte must be in [lo_, hi_]
+  kInstCapture,      // capturing parenthesis number cap_
+  kInstEmptyWidth,   // empty-width special (^ $ ...); bit(s) set in empty_
+  kInstMatch,        // found a match!
+  kInstNop,          // no-op; occasionally unavoidable
+  kInstFail,         // never match; occasionally unavoidable
+  kNumInst,
+};
+// Bit flags for empty-width specials
+enum EmptyOp {
+  kEmptyBeginLine        = 1<<0,      // ^ - beginning of line
+  kEmptyEndLine          = 1<<1,      // $ - end of line
+  kEmptyBeginText        = 1<<2,      // \A - beginning of text
+  kEmptyEndText          = 1<<3,      // \z - end of text
+  kEmptyWordBoundary     = 1<<4,      // \b - word boundary
+  kEmptyNonWordBoundary  = 1<<5,      // \B - not \b
+  kEmptyAllFlags         = (1<<6)-1,
+};
+class DFA;
+class Regexp;
+// Compiled form of regexp program.
+class Prog {
+ public:
+  Prog();
+  ~Prog();
+  // Single instruction in regexp program.
+  class Inst {
+   public:
+    // See the assertion below for why this is so.
+    Inst() = default;
+    // Copyable.
+    Inst(const Inst&) = default;
+    Inst& operator=(const Inst&) = default;
+    // Constructors per opcode
+    void InitAlt(uint32_t out, uint32_t out1);
+    void InitByteRange(int lo, int hi, int foldcase, uint32_t out);
+    void InitCapture(int cap, uint32_t out);
+    void InitEmptyWidth(EmptyOp empty, uint32_t out);
+    void InitMatch(int id);
+    void InitNop(uint32_t out);
+    void InitFail();
+    // Getters
+    int id(Prog* p) { return static_cast<int>(this - p->inst_.data()); }
+    InstOp opcode() { return static_cast<InstOp>(out_opcode_&7); }
+    int last()      { return (out_opcode_>>3)&1; }
+    int out()       { return out_opcode_>>4; }
+    int out1()      { DCHECK(opcode() == kInstAlt || opcode() == kInstAltMatch); return out1_; }
+    int cap()       { DCHECK_EQ(opcode(), kInstCapture); return cap_; }
+    int lo()        { DCHECK_EQ(opcode(), kInstByteRange); return lo_; }
+    int hi()        { DCHECK_EQ(opcode(), kInstByteRange); return hi_; }
+    int foldcase()  { DCHECK_EQ(opcode(), kInstByteRange); return hint_foldcase_&1; }
+    int hint()      { DCHECK_EQ(opcode(), kInstByteRange); return hint_foldcase_>>1; }
+    int match_id()  { DCHECK_EQ(opcode(), kInstMatch); return match_id_; }
+    EmptyOp empty() { DCHECK_EQ(opcode(), kInstEmptyWidth); return empty_; }
+    bool greedy(Prog* p) {
+      DCHECK_EQ(opcode(), kInstAltMatch);
+      return p->inst(out())->opcode() == kInstByteRange ||
+             (p->inst(out())->opcode() == kInstNop &&
+              p->inst(p->inst(out())->out())->opcode() == kInstByteRange);
+    }
+    // Does this inst (an kInstByteRange) match c?
+    inline bool Matches(int c) {
+      DCHECK_EQ(opcode(), kInstByteRange);
+      if (foldcase() && 'A' <= c && c <= 'Z')
+        c += 'a' - 'A';
+      return lo_ <= c && c <= hi_;
+    }
+    // Returns string representation for debugging.
+    std::string Dump();
+    // Maximum instruction id.
+    // (Must fit in out_opcode_. PatchList/last steal another bit.)
+    static const int kMaxInst = (1<<28) - 1;
+   private:
+    void set_opcode(InstOp opcode) {
+      out_opcode_ = (out()<<4) | (last()<<3) | opcode;
+    }
+    void set_last() {
+      out_opcode_ = (out()<<4) | (1<<3) | opcode();
+    }
+    void set_out(int out) {
+      out_opcode_ = (out<<4) | (last()<<3) | opcode();
+    }
+    void set_out_opcode(int out, InstOp opcode) {
+      out_opcode_ = (out<<4) | (last()<<3) | opcode;
+    }
+    uint32_t out_opcode_;  // 28 bits: out, 1 bit: last, 3 (low) bits: opcode
+    union {                // additional instruction arguments:
+      uint32_t out1_;      // opcode == kInstAlt
+                           //   alternate next instruction
+      int32_t cap_;        // opcode == kInstCapture
+                           //   Index of capture register (holds text
+                           //   position recorded by capturing parentheses).
+                           //   For \n (the submatch for the nth parentheses),
+                           //   the left parenthesis captures into register 2*n
+                           //   and the right one captures into register 2*n+1.
+      int32_t match_id_;   // opcode == kInstMatch
+                           //   Match ID to identify this match (for re2::Set).
+      struct {             // opcode == kInstByteRange
+        uint8_t lo_;       //   byte range is lo_-hi_ inclusive
+        uint8_t hi_;       //
+        uint16_t hint_foldcase_;  // 15 bits: hint, 1 (low) bit: foldcase
+                           //   hint to execution engines: the delta to the
+                           //   next instruction (in the current list) worth
+                           //   exploring iff this instruction matched; 0
+                           //   means there are no remaining possibilities,
+                           //   which is most likely for character classes.
+                           //   foldcase: A-Z -> a-z before checking range.
+      };
+      EmptyOp empty_;       // opcode == kInstEmptyWidth
+                            //   empty_ is bitwise OR of kEmpty* flags above.
+    };
+    friend class Compiler;
+    friend struct PatchList;
+    friend class Prog;
+  };
+  // Inst must be trivial so that we can freely clear it with memset(3).
+  // Arrays of Inst are initialised by copying the initial elements with
+  // memmove(3) and then clearing any remaining elements with memset(3).
+  static_assert(std::is_trivial<Inst>::value, "Inst must be trivial");
+  // Whether to anchor the search.
+  enum Anchor {
+    kUnanchored,  // match anywhere
+    kAnchored,    // match only starting at beginning of text
+  };
+  // Kind of match to look for (for anchor != kFullMatch)
+  //
+  // kLongestMatch mode finds the overall longest
+  // match but still makes its submatch choices the way
+  // Perl would, not in the way prescribed by POSIX.
+  // The POSIX rules are much more expensive to implement,
+  // and no one has needed them.
+  //
+  // kFullMatch is not strictly necessary -- we could use
+  // kLongestMatch and then check the length of the match -- but
+  // the matching code can run faster if it knows to consider only
+  // full matches.
+  enum MatchKind {
+    kFirstMatch,     // like Perl, PCRE
+    kLongestMatch,   // like egrep or POSIX
+    kFullMatch,      // match only entire text; implies anchor==kAnchored
+    kManyMatch       // for SearchDFA, records set of matches
+  };
+  Inst *inst(int id) { return &inst_[id]; }
+  int start() { return start_; }
+  void set_start(int start) { start_ = start; }
+  int start_unanchored() { return start_unanchored_; }
+  void set_start_unanchored(int start) { start_unanchored_ = start; }
+  int size() { return size_; }
+  bool reversed() { return reversed_; }
+  void set_reversed(bool reversed) { reversed_ = reversed; }
+  int list_count() { return list_count_; }
+  int inst_count(InstOp op) { return inst_count_[op]; }
+  uint16_t* list_heads() { return list_heads_.data(); }
+  int64_t dfa_mem() { return dfa_mem_; }
+  void set_dfa_mem(int64_t dfa_mem) { dfa_mem_ = dfa_mem; }
+  bool anchor_start() { return anchor_start_; }
+  void set_anchor_start(bool b) { anchor_start_ = b; }
+  bool anchor_end() { return anchor_end_; }
+  void set_anchor_end(bool b) { anchor_end_ = b; }
+  int bytemap_range() { return bytemap_range_; }
+  const uint8_t* bytemap() { return bytemap_; }
+  bool can_prefix_accel() { return prefix_size_ != 0; }
+  // Accelerates to the first likely occurrence of the prefix.
+  // Returns a pointer to the first byte or NULL if not found.
+  const void* PrefixAccel(const void* data, size_t size) {
+    DCHECK_GE(prefix_size_, 1);
+    return prefix_size_ == 1 ? memchr(data, prefix_front_, size)
+                             : PrefixAccel_FrontAndBack(data, size);
+  }
+  // An implementation of prefix accel that looks for prefix_front_ and
+  // prefix_back_ to return fewer false positives than memchr(3) alone.
+  const void* PrefixAccel_FrontAndBack(const void* data, size_t size);
+  // Returns string representation of program for debugging.
+  std::string Dump();
+  std::string DumpUnanchored();
+  std::string DumpByteMap();
+  // Returns the set of kEmpty flags that are in effect at
+  // position p within context.
+  static uint32_t EmptyFlags(const StringPiece& context, const char* p);
+  // Returns whether byte c is a word character: ASCII only.
+  // Used by the implementation of \b and \B.
+  // This is not right for Unicode, but:
+  //   - it's hard to get right in a byte-at-a-time matching world
+  //     (the DFA has only one-byte lookahead).
+  //   - even if the lookahead were possible, the Progs would be huge.
+  // This crude approximation is the same one PCRE uses.
+  static bool IsWordChar(uint8_t c) {
+    return ('A' <= c && c <= 'Z') ||
+           ('a' <= c && c <= 'z') ||
+           ('0' <= c && c <= '9') ||
+           c == '_';
+  }
+  // Execution engines.  They all search for the regexp (run the prog)
+  // in text, which is in the larger context (used for ^ $ \b etc).
+  // Anchor and kind control the kind of search.
+  // Returns true if match found, false if not.
+  // If match found, fills match[0..nmatch-1] with submatch info.
+  // match[0] is overall match, match[1] is first set of parens, etc.
+  // If a particular submatch is not matched during the regexp match,
+  // it is set to NULL.
+  //
+  // Matching text == StringPiece(NULL, 0) is treated as any other empty
+  // string, but note that on return, it will not be possible to distinguish
+  // submatches that matched that empty string from submatches that didn't
+  // match anything.  Either way, match[i] == NULL.
+  // Search using NFA: can find submatches but kind of slow.
+  bool SearchNFA(const StringPiece& text, const StringPiece& context,
+                 Anchor anchor, MatchKind kind,
+                 StringPiece* match, int nmatch);
+  // Search using DFA: much faster than NFA but only finds
+  // end of match and can use a lot more memory.
+  // Returns whether a match was found.
+  // If the DFA runs out of memory, sets *failed to true and returns false.
+  // If matches != NULL and kind == kManyMatch and there is a match,
+  // SearchDFA fills matches with the match IDs of the final matching state.
+  bool SearchDFA(const StringPiece& text, const StringPiece& context,
+                 Anchor anchor, MatchKind kind, StringPiece* match0,
+                 bool* failed, SparseSet* matches);
+  // The callback issued after building each DFA state with BuildEntireDFA().
+  // If next is null, then the memory budget has been exhausted and building
+  // will halt. Otherwise, the state has been built and next points to an array
+  // of bytemap_range()+1 slots holding the next states as per the bytemap and
+  // kByteEndText. The number of the state is implied by the callback sequence:
+  // the first callback is for state 0, the second callback is for state 1, ...
+  // match indicates whether the state is a matching state.
+  using DFAStateCallback = std::function<void(const int* next, bool match)>;
+  // Build the entire DFA for the given match kind.
+  // Usually the DFA is built out incrementally, as needed, which
+  // avoids lots of unnecessary work.
+  // If cb is not empty, it receives one callback per state built.
+  // Returns the number of states built.
+  // FOR TESTING OR EXPERIMENTAL PURPOSES ONLY.
+  int BuildEntireDFA(MatchKind kind, const DFAStateCallback& cb);
+  // Controls whether the DFA should bail out early if the NFA would be faster.
+  // FOR TESTING ONLY.
+  static void TEST_dfa_should_bail_when_slow(bool b);
+  // Compute bytemap.
+  void ComputeByteMap();
+  // Run peep-hole optimizer on program.
+  void Optimize();
+  // One-pass NFA: only correct if IsOnePass() is true,
+  // but much faster than NFA (competitive with PCRE)
+  // for those expressions.
+  bool IsOnePass();
+  bool SearchOnePass(const StringPiece& text, const StringPiece& context,
+                     Anchor anchor, MatchKind kind,
+                     StringPiece* match, int nmatch);
+  // Bit-state backtracking.  Fast on small cases but uses memory
+  // proportional to the product of the list count and the text size.
+  bool CanBitState() { return list_heads_.data() != NULL; }
+  bool SearchBitState(const StringPiece& text, const StringPiece& context,
+                      Anchor anchor, MatchKind kind,
+                      StringPiece* match, int nmatch);
+  static const int kMaxOnePassCapture = 5;  // $0 through $4
+  // Backtracking search: the gold standard against which the other
+  // implementations are checked.  FOR TESTING ONLY.
+  // It allocates a ton of memory to avoid running forever.
+  // It is also recursive, so can't use in production (will overflow stacks).
+  // The name "Unsafe" here is supposed to be a flag that
+  // you should not be using this function.
+  bool UnsafeSearchBacktrack(const StringPiece& text,
+                             const StringPiece& context,
+                             Anchor anchor, MatchKind kind,
+                             StringPiece* match, int nmatch);
+  // Computes range for any strings matching regexp. The min and max can in
+  // some cases be arbitrarily precise, so the caller gets to specify the
+  // maximum desired length of string returned.
+  //
+  // Assuming PossibleMatchRange(&min, &max, N) returns successfully, any
+  // string s that is an anchored match for this regexp satisfies
+  //   min <= s && s <= max.
+  //
+  // Note that PossibleMatchRange() will only consider the first copy of an
+  // infinitely repeated element (i.e., any regexp element followed by a '*' or
+  // '+' operator). Regexps with "{N}" constructions are not affected, as those
+  // do not compile down to infinite repetitions.
+  //
+  // Returns true on success, false on error.
+  bool PossibleMatchRange(std::string* min, std::string* max, int maxlen);
+  // EXPERIMENTAL! SUBJECT TO CHANGE!
+  // Outputs the program fanout into the given sparse array.
+  void Fanout(SparseArray<int>* fanout);
+  // Compiles a collection of regexps to Prog.  Each regexp will have
+  // its own Match instruction recording the index in the output vector.
+  static Prog* CompileSet(Regexp* re, RE2::Anchor anchor, int64_t max_mem);
+  // Flattens the Prog from "tree" form to "list" form. This is an in-place
+  // operation in the sense that the old instructions are lost.
+  void Flatten();
+  // Walks the Prog; the "successor roots" or predecessors of the reachable
+  // instructions are marked in rootmap or predmap/predvec, respectively.
+  // reachable and stk are preallocated scratch structures.
+  void MarkSuccessors(SparseArray<int>* rootmap,
+                      SparseArray<int>* predmap,
+                      std::vector<std::vector<int>>* predvec,
+                      SparseSet* reachable, std::vector<int>* stk);
+  // Walks the Prog from the given "root" instruction; the "dominator root"
+  // of the reachable instructions (if such exists) is marked in rootmap.
+  // reachable and stk are preallocated scratch structures.
+  void MarkDominator(int root, SparseArray<int>* rootmap,
+                     SparseArray<int>* predmap,
+                     std::vector<std::vector<int>>* predvec,
+                     SparseSet* reachable, std::vector<int>* stk);
+  // Walks the Prog from the given "root" instruction; the reachable
+  // instructions are emitted in "list" form and appended to flat.
+  // reachable and stk are preallocated scratch structures.
+  void EmitList(int root, SparseArray<int>* rootmap,
+                std::vector<Inst>* flat,
+                SparseSet* reachable, std::vector<int>* stk);
+  // Computes hints for ByteRange instructions in [begin, end).
+  void ComputeHints(std::vector<Inst>* flat, int begin, int end);
+ private:
+  friend class Compiler;
+  DFA* GetDFA(MatchKind kind);
+  void DeleteDFA(DFA* dfa);
+  bool anchor_start_;       // regexp has explicit start anchor
+  bool anchor_end_;         // regexp has explicit end anchor
+  bool reversed_;           // whether program runs backward over input
+  bool did_flatten_;        // has Flatten been called?
+  bool did_onepass_;        // has IsOnePass been called?
+  int start_;               // entry point for program
+  int start_unanchored_;    // unanchored entry point for program
+  int size_;                // number of instructions
+  int bytemap_range_;       // bytemap_[x] < bytemap_range_
+  size_t prefix_size_;      // size of prefix (0 if no prefix)
+  int prefix_front_;        // first byte of prefix (-1 if no prefix)
+  int prefix_back_;         // last byte of prefix (-1 if no prefix)
+  int list_count_;                 // count of lists (see above)
+  int inst_count_[kNumInst];       // count of instructions by opcode
+  PODArray<uint16_t> list_heads_;  // sparse array enumerating list heads
+                                   // not populated if size_ is overly large
+  PODArray<Inst> inst_;              // pointer to instruction array
+  PODArray<uint8_t> onepass_nodes_;  // data for OnePass nodes
+  int64_t dfa_mem_;         // Maximum memory for DFAs.
+  DFA* dfa_first_;          // DFA cached for kFirstMatch/kManyMatch
+  DFA* dfa_longest_;        // DFA cached for kLongestMatch/kFullMatch
+  uint8_t bytemap_[256];    // map from input bytes to byte classes
+  std::once_flag dfa_first_once_;
+  std::once_flag dfa_longest_once_;
+  Prog(const Prog&) = delete;
+  Prog& operator=(const Prog&) = delete;
+};
+}  // namespace re2
+#endif  // RE2_PROG_H_

data/third_party/re2/re2/re2.cc ADDED

@@ -0,0 +1,1362 @@
+// Copyright 2003-2009 The RE2 Authors.  All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+// Regular expression interface RE2.
+//
+// Originally the PCRE C++ wrapper, but adapted to use
+// the new automata-based regular expression engines.
+#include "re2/re2.h"
+#include <assert.h>
+#include <ctype.h>
+#include <errno.h>
+#ifdef _MSC_VER
+#include <intrin.h>
+#endif
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <algorithm>
+#include <atomic>
+#include <iterator>
+#include <mutex>
+#include <string>
+#include <utility>
+#include <vector>
+#include "util/util.h"
+#include "util/logging.h"
+#include "util/strutil.h"
+#include "util/utf.h"
+#include "re2/prog.h"
+#include "re2/regexp.h"
+#include "re2/sparse_array.h"
+namespace re2 {
+// Maximum number of args we can set
+static const int kMaxArgs = 16;
+static const int kVecSize = 1+kMaxArgs;
+const int RE2::Options::kDefaultMaxMem;  // initialized in re2.h
+RE2::Options::Options(RE2::CannedOptions opt)
+  : encoding_(opt == RE2::Latin1 ? EncodingLatin1 : EncodingUTF8),
+    posix_syntax_(opt == RE2::POSIX),
+    longest_match_(opt == RE2::POSIX),
+    log_errors_(opt != RE2::Quiet),
+    max_mem_(kDefaultMaxMem),
+    literal_(false),
+    never_nl_(false),
+    dot_nl_(false),
+    never_capture_(false),
+    case_sensitive_(true),
+    perl_classes_(false),
+    word_boundary_(false),
+    one_line_(false) {
+}
+// static empty objects for use as const references.
+// To avoid global constructors, allocated in RE2::Init().
+static const std::string* empty_string;
+static const std::map<std::string, int>* empty_named_groups;
+static const std::map<int, std::string>* empty_group_names;
+// Converts from Regexp error code to RE2 error code.
+// Maybe some day they will diverge.  In any event, this
+// hides the existence of Regexp from RE2 users.
+static RE2::ErrorCode RegexpErrorToRE2(re2::RegexpStatusCode code) {
+  switch (code) {
+    case re2::kRegexpSuccess:
+      return RE2::NoError;
+    case re2::kRegexpInternalError:
+      return RE2::ErrorInternal;
+    case re2::kRegexpBadEscape:
+      return RE2::ErrorBadEscape;
+    case re2::kRegexpBadCharClass:
+      return RE2::ErrorBadCharClass;
+    case re2::kRegexpBadCharRange:
+      return RE2::ErrorBadCharRange;
+    case re2::kRegexpMissingBracket:
+      return RE2::ErrorMissingBracket;
+    case re2::kRegexpMissingParen:
+      return RE2::ErrorMissingParen;
+    case re2::kRegexpTrailingBackslash:
+      return RE2::ErrorTrailingBackslash;
+    case re2::kRegexpRepeatArgument:
+      return RE2::ErrorRepeatArgument;
+    case re2::kRegexpRepeatSize:
+      return RE2::ErrorRepeatSize;
+    case re2::kRegexpRepeatOp:
+      return RE2::ErrorRepeatOp;
+    case re2::kRegexpBadPerlOp:
+      return RE2::ErrorBadPerlOp;
+    case re2::kRegexpBadUTF8:
+      return RE2::ErrorBadUTF8;
+    case re2::kRegexpBadNamedCapture:
+      return RE2::ErrorBadNamedCapture;
+  }
+  return RE2::ErrorInternal;
+}
+static std::string trunc(const StringPiece& pattern) {
+  if (pattern.size() < 100)
+    return std::string(pattern);
+  return std::string(pattern.substr(0, 100)) + "...";
+}
+RE2::RE2(const char* pattern) {
+  Init(pattern, DefaultOptions);
+}
+RE2::RE2(const std::string& pattern) {
+  Init(pattern, DefaultOptions);
+}
+RE2::RE2(const StringPiece& pattern) {
+  Init(pattern, DefaultOptions);
+}
+RE2::RE2(const StringPiece& pattern, const Options& options) {
+  Init(pattern, options);
+}
+int RE2::Options::ParseFlags() const {
+  int flags = Regexp::ClassNL;
+  switch (encoding()) {
+    default:
+      if (log_errors())
+        LOG(ERROR) << "Unknown encoding " << encoding();
+      break;
+    case RE2::Options::EncodingUTF8:
+      break;
+    case RE2::Options::EncodingLatin1:
+      flags |= Regexp::Latin1;
+      break;
+  }
+  if (!posix_syntax())
+    flags |= Regexp::LikePerl;
+  if (literal())
+    flags |= Regexp::Literal;
+  if (never_nl())
+    flags |= Regexp::NeverNL;
+  if (dot_nl())
+    flags |= Regexp::DotNL;
+  if (never_capture())
+    flags |= Regexp::NeverCapture;
+  if (!case_sensitive())
+    flags |= Regexp::FoldCase;
+  if (perl_classes())
+    flags |= Regexp::PerlClasses;
+  if (word_boundary())
+    flags |= Regexp::PerlB;
+  if (one_line())
+    flags |= Regexp::OneLine;
+  return flags;
+}
+void RE2::Init(const StringPiece& pattern, const Options& options) {
+  static std::once_flag empty_once;
+  std::call_once(empty_once, []() {
+    empty_string = new std::string;
+    empty_named_groups = new std::map<std::string, int>;
+    empty_group_names = new std::map<int, std::string>;
+  });
+  pattern_.assign(pattern.data(), pattern.size());
+  options_.Copy(options);
+  entire_regexp_ = NULL;
+  error_ = empty_string;
+  error_code_ = NoError;
+  error_arg_.clear();
+  prefix_.clear();
+  prefix_foldcase_ = false;
+  suffix_regexp_ = NULL;
+  prog_ = NULL;
+  num_captures_ = -1;
+  is_one_pass_ = false;
+  rprog_ = NULL;
+  named_groups_ = NULL;
+  group_names_ = NULL;
+  RegexpStatus status;
+  entire_regexp_ = Regexp::Parse(
+    pattern_,
+    static_cast<Regexp::ParseFlags>(options_.ParseFlags()),
+    &status);
+  if (entire_regexp_ == NULL) {
+    if (options_.log_errors()) {
+      LOG(ERROR) << "Error parsing '" << trunc(pattern_) << "': "
+                 << status.Text();
+    }
+    error_ = new std::string(status.Text());
+    error_code_ = RegexpErrorToRE2(status.code());
+    error_arg_ = std::string(status.error_arg());
+    return;
+  }
+  re2::Regexp* suffix;
+  if (entire_regexp_->RequiredPrefix(&prefix_, &prefix_foldcase_, &suffix))
+    suffix_regexp_ = suffix;
+  else
+    suffix_regexp_ = entire_regexp_->Incref();
+  // Two thirds of the memory goes to the forward Prog,
+  // one third to the reverse prog, because the forward
+  // Prog has two DFAs but the reverse prog has one.
+  prog_ = suffix_regexp_->CompileToProg(options_.max_mem()*2/3);
+  if (prog_ == NULL) {
+    if (options_.log_errors())
+      LOG(ERROR) << "Error compiling '" << trunc(pattern_) << "'";
+    error_ = new std::string("pattern too large - compile failed");
+    error_code_ = RE2::ErrorPatternTooLarge;
+    return;
+  }
+  // We used to compute this lazily, but it's used during the
+  // typical control flow for a match call, so we now compute
+  // it eagerly, which avoids the overhead of std::once_flag.
+  num_captures_ = suffix_regexp_->NumCaptures();
+  // Could delay this until the first match call that
+  // cares about submatch information, but the one-pass
+  // machine's memory gets cut from the DFA memory budget,
+  // and that is harder to do if the DFA has already
+  // been built.
+  is_one_pass_ = prog_->IsOnePass();
+}
+// Returns rprog_, computing it if needed.
+re2::Prog* RE2::ReverseProg() const {
+  std::call_once(rprog_once_, [](const RE2* re) {
+    re->rprog_ =
+        re->suffix_regexp_->CompileToReverseProg(re->options_.max_mem() / 3);
+    if (re->rprog_ == NULL) {
+      if (re->options_.log_errors())
+        LOG(ERROR) << "Error reverse compiling '" << trunc(re->pattern_) << "'";
+      // We no longer touch error_ and error_code_ because failing to compile
+      // the reverse Prog is not a showstopper: falling back to NFA execution
+      // is fine. More importantly, an RE2 object is supposed to be logically
+      // immutable: whatever ok() would have returned after Init() completed,
+      // it should continue to return that no matter what ReverseProg() does.
+    }
+  }, this);
+  return rprog_;
+}
+RE2::~RE2() {
+  if (suffix_regexp_)
+    suffix_regexp_->Decref();
+  if (entire_regexp_)
+    entire_regexp_->Decref();
+  delete prog_;
+  delete rprog_;
+  if (error_ != empty_string)
+    delete error_;
+  if (named_groups_ != NULL && named_groups_ != empty_named_groups)
+    delete named_groups_;
+  if (group_names_ != NULL &&  group_names_ != empty_group_names)
+    delete group_names_;
+}
+int RE2::ProgramSize() const {
+  if (prog_ == NULL)
+    return -1;
+  return prog_->size();
+}
+int RE2::ReverseProgramSize() const {
+  if (prog_ == NULL)
+    return -1;
+  Prog* prog = ReverseProg();
+  if (prog == NULL)
+    return -1;
+  return prog->size();
+}
+// Finds the most significant non-zero bit in n.
+static int FindMSBSet(uint32_t n) {
+  DCHECK_NE(n, 0);
+#if defined(__GNUC__)
+  return 31 ^ __builtin_clz(n);
+#elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))
+  unsigned long c;
+  _BitScanReverse(&c, n);
+  return static_cast<int>(c);
+#else
+  int c = 0;
+  for (int shift = 1 << 4; shift != 0; shift >>= 1) {
+    uint32_t word = n >> shift;
+    if (word != 0) {
+      n = word;
+      c += shift;
+    }
+  }
+  return c;
+#endif
+}
+static int Fanout(Prog* prog, std::vector<int>* histogram) {
+  SparseArray<int> fanout(prog->size());
+  prog->Fanout(&fanout);
+  int data[32] = {};
+  int size = 0;
+  for (SparseArray<int>::iterator i = fanout.begin(); i != fanout.end(); ++i) {
+    if (i->value() == 0)
+      continue;
+    uint32_t value = i->value();
+    int bucket = FindMSBSet(value);
+    bucket += value & (value-1) ? 1 : 0;
+    ++data[bucket];
+    size = std::max(size, bucket+1);
+  }
+  if (histogram != NULL)
+    histogram->assign(data, data+size);
+  return size-1;
+}
+int RE2::ProgramFanout(std::vector<int>* histogram) const {
+  if (prog_ == NULL)
+    return -1;
+  return Fanout(prog_, histogram);
+}
+int RE2::ReverseProgramFanout(std::vector<int>* histogram) const {
+  if (prog_ == NULL)
+    return -1;
+  Prog* prog = ReverseProg();
+  if (prog == NULL)
+    return -1;
+  return Fanout(prog, histogram);
+}
+// Returns named_groups_, computing it if needed.
+const std::map<std::string, int>& RE2::NamedCapturingGroups() const {
+  std::call_once(named_groups_once_, [](const RE2* re) {
+    if (re->suffix_regexp_ != NULL)
+      re->named_groups_ = re->suffix_regexp_->NamedCaptures();
+    if (re->named_groups_ == NULL)
+      re->named_groups_ = empty_named_groups;
+  }, this);
+  return *named_groups_;
+}
+// Returns group_names_, computing it if needed.
+const std::map<int, std::string>& RE2::CapturingGroupNames() const {
+  std::call_once(group_names_once_, [](const RE2* re) {
+    if (re->suffix_regexp_ != NULL)
+      re->group_names_ = re->suffix_regexp_->CaptureNames();
+    if (re->group_names_ == NULL)
+      re->group_names_ = empty_group_names;
+  }, this);
+  return *group_names_;
+}
+/***** Convenience interfaces *****/
+bool RE2::FullMatchN(const StringPiece& text, const RE2& re,
+                     const Arg* const args[], int n) {
+  return re.DoMatch(text, ANCHOR_BOTH, NULL, args, n);
+}
+bool RE2::PartialMatchN(const StringPiece& text, const RE2& re,
+                        const Arg* const args[], int n) {
+  return re.DoMatch(text, UNANCHORED, NULL, args, n);
+}
+bool RE2::ConsumeN(StringPiece* input, const RE2& re,
+                   const Arg* const args[], int n) {
+  size_t consumed;
+  if (re.DoMatch(*input, ANCHOR_START, &consumed, args, n)) {
+    input->remove_prefix(consumed);
+    return true;
+  } else {
+    return false;
+  }
+}
+bool RE2::FindAndConsumeN(StringPiece* input, const RE2& re,
+                          const Arg* const args[], int n) {
+  size_t consumed;
+  if (re.DoMatch(*input, UNANCHORED, &consumed, args, n)) {
+    input->remove_prefix(consumed);
+    return true;
+  } else {
+    return false;
+  }
+}
+bool RE2::Replace(std::string* str,
+                  const RE2& re,
+                  const StringPiece& rewrite) {
+  StringPiece vec[kVecSize];
+  int nvec = 1 + MaxSubmatch(rewrite);
+  if (nvec > 1 + re.NumberOfCapturingGroups())
+    return false;
+  if (nvec > static_cast<int>(arraysize(vec)))
+    return false;
+  if (!re.Match(*str, 0, str->size(), UNANCHORED, vec, nvec))
+    return false;
+  std::string s;
+  if (!re.Rewrite(&s, rewrite, vec, nvec))
+    return false;
+  assert(vec[0].data() >= str->data());
+  assert(vec[0].data() + vec[0].size() <= str->data() + str->size());
+  str->replace(vec[0].data() - str->data(), vec[0].size(), s);
+  return true;
+}
+int RE2::GlobalReplace(std::string* str,
+                       const RE2& re,
+                       const StringPiece& rewrite) {
+  StringPiece vec[kVecSize];
+  int nvec = 1 + MaxSubmatch(rewrite);
+  if (nvec > 1 + re.NumberOfCapturingGroups())
+    return false;
+  if (nvec > static_cast<int>(arraysize(vec)))
+    return false;
+  const char* p = str->data();
+  const char* ep = p + str->size();
+  const char* lastend = NULL;
+  std::string out;
+  int count = 0;
+#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+  // Iterate just once when fuzzing. Otherwise, we easily get bogged down
+  // and coverage is unlikely to improve despite significant expense.
+  while (p == str->data()) {
+#else
+  while (p <= ep) {
+#endif
+    if (!re.Match(*str, static_cast<size_t>(p - str->data()),
+                  str->size(), UNANCHORED, vec, nvec))
+      break;
+    if (p < vec[0].data())
+      out.append(p, vec[0].data() - p);
+    if (vec[0].data() == lastend && vec[0].empty()) {
+      // Disallow empty match at end of last match: skip ahead.
+      //
+      // fullrune() takes int, not ptrdiff_t. However, it just looks
+      // at the leading byte and treats any length >= 4 the same.
+      if (re.options().encoding() == RE2::Options::EncodingUTF8 &&
+          fullrune(p, static_cast<int>(std::min(ptrdiff_t{4}, ep - p)))) {
+        // re is in UTF-8 mode and there is enough left of str
+        // to allow us to advance by up to UTFmax bytes.
+        Rune r;
+        int n = chartorune(&r, p);
+        // Some copies of chartorune have a bug that accepts
+        // encodings of values in (10FFFF, 1FFFFF] as valid.
+        if (r > Runemax) {
+          n = 1;
+          r = Runeerror;
+        }
+        if (!(n == 1 && r == Runeerror)) {  // no decoding error
+          out.append(p, n);
+          p += n;
+          continue;
+        }
+      }
+      // Most likely, re is in Latin-1 mode. If it is in UTF-8 mode,
+      // we fell through from above and the GIGO principle applies.
+      if (p < ep)
+        out.append(p, 1);
+      p++;
+      continue;
+    }
+    re.Rewrite(&out, rewrite, vec, nvec);
+    p = vec[0].data() + vec[0].size();
+    lastend = p;
+    count++;
+  }
+  if (count == 0)
+    return 0;
+  if (p < ep)
+    out.append(p, ep - p);
+  using std::swap;
+  swap(out, *str);
+  return count;
+}
+bool RE2::Extract(const StringPiece& text,
+                  const RE2& re,
+                  const StringPiece& rewrite,
+                  std::string* out) {
+  StringPiece vec[kVecSize];
+  int nvec = 1 + MaxSubmatch(rewrite);
+  if (nvec > 1 + re.NumberOfCapturingGroups())
+    return false;
+  if (nvec > static_cast<int>(arraysize(vec)))
+    return false;
+  if (!re.Match(text, 0, text.size(), UNANCHORED, vec, nvec))
+    return false;
+  out->clear();
+  return re.Rewrite(out, rewrite, vec, nvec);
+}
+std::string RE2::QuoteMeta(const StringPiece& unquoted) {
+  std::string result;
+  result.reserve(unquoted.size() << 1);
+  // Escape any ascii character not in [A-Za-z_0-9].
+  //
+  // Note that it's legal to escape a character even if it has no
+  // special meaning in a regular expression -- so this function does
+  // that.  (This also makes it identical to the perl function of the
+  // same name except for the null-character special case;
+  // see `perldoc -f quotemeta`.)
+  for (size_t ii = 0; ii < unquoted.size(); ++ii) {
+    // Note that using 'isalnum' here raises the benchmark time from
+    // 32ns to 58ns:
+    if ((unquoted[ii] < 'a' || unquoted[ii] > 'z') &&
+        (unquoted[ii] < 'A' || unquoted[ii] > 'Z') &&
+        (unquoted[ii] < '0' || unquoted[ii] > '9') &&
+        unquoted[ii] != '_' &&
+        // If this is the part of a UTF8 or Latin1 character, we need
+        // to copy this byte without escaping.  Experimentally this is
+        // what works correctly with the regexp library.
+        !(unquoted[ii] & 128)) {
+      if (unquoted[ii] == '\0') {  // Special handling for null chars.
+        // Note that this special handling is not strictly required for RE2,
+        // but this quoting is required for other regexp libraries such as
+        // PCRE.
+        // Can't use "\\0" since the next character might be a digit.
+        result += "\\x00";
+        continue;
+      }
+      result += '\\';
+    }
+    result += unquoted[ii];
+  }
+  return result;
+}
+bool RE2::PossibleMatchRange(std::string* min, std::string* max,
+                             int maxlen) const {
+  if (prog_ == NULL)
+    return false;
+  int n = static_cast<int>(prefix_.size());
+  if (n > maxlen)
+    n = maxlen;
+  // Determine initial min max from prefix_ literal.
+  *min = prefix_.substr(0, n);
+  *max = prefix_.substr(0, n);
+  if (prefix_foldcase_) {
+    // prefix is ASCII lowercase; change *min to uppercase.
+    for (int i = 0; i < n; i++) {
+      char& c = (*min)[i];
+      if ('a' <= c && c <= 'z')
+        c += 'A' - 'a';
+    }
+  }
+  // Add to prefix min max using PossibleMatchRange on regexp.
+  std::string dmin, dmax;
+  maxlen -= n;
+  if (maxlen > 0 && prog_->PossibleMatchRange(&dmin, &dmax, maxlen)) {
+    min->append(dmin);
+    max->append(dmax);
+  } else if (!max->empty()) {
+    // prog_->PossibleMatchRange has failed us,
+    // but we still have useful information from prefix_.
+    // Round up *max to allow any possible suffix.
+    PrefixSuccessor(max);
+  } else {
+    // Nothing useful.
+    *min = "";
+    *max = "";
+    return false;
+  }
+  return true;
+}
+// Avoid possible locale nonsense in standard strcasecmp.
+// The string a is known to be all lowercase.
+static int ascii_strcasecmp(const char* a, const char* b, size_t len) {
+  const char* ae = a + len;
+  for (; a < ae; a++, b++) {
+    uint8_t x = *a;
+    uint8_t y = *b;
+    if ('A' <= y && y <= 'Z')
+      y += 'a' - 'A';
+    if (x != y)
+      return x - y;
+  }
+  return 0;
+}
+/***** Actual matching and rewriting code *****/
+bool RE2::Match(const StringPiece& text,
+                size_t startpos,
+                size_t endpos,
+                Anchor re_anchor,
+                StringPiece* submatch,
+                int nsubmatch) const {
+  if (!ok()) {
+    if (options_.log_errors())
+      LOG(ERROR) << "Invalid RE2: " << *error_;
+    return false;
+  }
+  if (startpos > endpos || endpos > text.size()) {
+    if (options_.log_errors())
+      LOG(ERROR) << "RE2: invalid startpos, endpos pair. ["
+                 << "startpos: " << startpos << ", "
+                 << "endpos: " << endpos << ", "
+                 << "text size: " << text.size() << "]";
+    return false;
+  }
+  StringPiece subtext = text;
+  subtext.remove_prefix(startpos);
+  subtext.remove_suffix(text.size() - endpos);
+  // Use DFAs to find exact location of match, filter out non-matches.
+  // Don't ask for the location if we won't use it.
+  // SearchDFA can do extra optimizations in that case.
+  StringPiece match;
+  StringPiece* matchp = &match;
+  if (nsubmatch == 0)
+    matchp = NULL;
+  int ncap = 1 + NumberOfCapturingGroups();
+  if (ncap > nsubmatch)
+    ncap = nsubmatch;
+  // If the regexp is anchored explicitly, must not be in middle of text.
+  if (prog_->anchor_start() && startpos != 0)
+    return false;
+  if (prog_->anchor_end() && endpos != text.size())
+    return false;
+  // If the regexp is anchored explicitly, update re_anchor
+  // so that we can potentially fall into a faster case below.
+  if (prog_->anchor_start() && prog_->anchor_end())
+    re_anchor = ANCHOR_BOTH;
+  else if (prog_->anchor_start() && re_anchor != ANCHOR_BOTH)
+    re_anchor = ANCHOR_START;
+  // Check for the required prefix, if any.
+  size_t prefixlen = 0;
+  if (!prefix_.empty()) {
+    if (startpos != 0)
+      return false;
+    prefixlen = prefix_.size();
+    if (prefixlen > subtext.size())
+      return false;
+    if (prefix_foldcase_) {
+      if (ascii_strcasecmp(&prefix_[0], subtext.data(), prefixlen) != 0)
+        return false;
+    } else {
+      if (memcmp(&prefix_[0], subtext.data(), prefixlen) != 0)
+        return false;
+    }
+    subtext.remove_prefix(prefixlen);
+    // If there is a required prefix, the anchor must be at least ANCHOR_START.
+    if (re_anchor != ANCHOR_BOTH)
+      re_anchor = ANCHOR_START;
+  }
+  Prog::Anchor anchor = Prog::kUnanchored;
+  Prog::MatchKind kind = Prog::kFirstMatch;
+  if (options_.longest_match())
+    kind = Prog::kLongestMatch;
+  bool can_one_pass = (is_one_pass_ && ncap <= Prog::kMaxOnePassCapture);
+  // BitState allocates a bitmap of size prog_->list_count() * text.size().
+  // It also allocates a stack of 3-word structures which could potentially
+  // grow as large as prog_->list_count() * text.size(), but in practice is
+  // much smaller.
+  const int kMaxBitStateBitmapSize = 256*1024;  // bitmap size <= max (bits)
+  bool can_bit_state = prog_->CanBitState();
+  size_t bit_state_text_max = kMaxBitStateBitmapSize / prog_->list_count();
+#ifdef RE2_HAVE_THREAD_LOCAL
+  hooks::context = this;
+#endif
+  bool dfa_failed = false;
+  bool skipped_test = false;
+  switch (re_anchor) {
+    default:
+      LOG(DFATAL) << "Unexpected re_anchor value: " << re_anchor;
+      return false;
+    case UNANCHORED: {
+      if (prog_->anchor_end()) {
+        // This is a very special case: we don't need the forward DFA because
+        // we already know where the match must end! Instead, the reverse DFA
+        // can say whether there is a match and (optionally) where it starts.
+        Prog* prog = ReverseProg();
+        if (prog == NULL) {
+          // Fall back to NFA below.
+          skipped_test = true;
+          break;
+        }
+        if (!prog->SearchDFA(subtext, text, Prog::kAnchored,
+                             Prog::kLongestMatch, matchp, &dfa_failed, NULL)) {
+          if (dfa_failed) {
+            if (options_.log_errors())
+              LOG(ERROR) << "DFA out of memory: "
+                         << "pattern length " << pattern_.size() << ", "
+                         << "program size " << prog->size() << ", "
+                         << "list count " << prog->list_count() << ", "
+                         << "bytemap range " << prog->bytemap_range();
+            // Fall back to NFA below.
+            skipped_test = true;
+            break;
+          }
+          return false;
+        }
+        if (matchp == NULL)  // Matched.  Don't care where.
+          return true;
+        break;
+      }
+      if (!prog_->SearchDFA(subtext, text, anchor, kind,
+                            matchp, &dfa_failed, NULL)) {
+        if (dfa_failed) {
+          if (options_.log_errors())
+            LOG(ERROR) << "DFA out of memory: "
+                       << "pattern length " << pattern_.size() << ", "
+                       << "program size " << prog_->size() << ", "
+                       << "list count " << prog_->list_count() << ", "
+                       << "bytemap range " << prog_->bytemap_range();
+          // Fall back to NFA below.
+          skipped_test = true;
+          break;
+        }
+        return false;
+      }
+      if (matchp == NULL)  // Matched.  Don't care where.
+        return true;
+      // SearchDFA set match.end() but didn't know where the
+      // match started.  Run the regexp backward from match.end()
+      // to find the longest possible match -- that's where it started.
+      Prog* prog = ReverseProg();
+      if (prog == NULL) {
+        // Fall back to NFA below.
+        skipped_test = true;
+        break;
+      }
+      if (!prog->SearchDFA(match, text, Prog::kAnchored,
+                           Prog::kLongestMatch, &match, &dfa_failed, NULL)) {
+        if (dfa_failed) {
+          if (options_.log_errors())
+            LOG(ERROR) << "DFA out of memory: "
+                       << "pattern length " << pattern_.size() << ", "
+                       << "program size " << prog->size() << ", "
+                       << "list count " << prog->list_count() << ", "
+                       << "bytemap range " << prog->bytemap_range();
+          // Fall back to NFA below.
+          skipped_test = true;
+          break;
+        }
+        if (options_.log_errors())
+          LOG(ERROR) << "SearchDFA inconsistency";
+        return false;
+      }
+      break;
+    }
+    case ANCHOR_BOTH:
+    case ANCHOR_START:
+      if (re_anchor == ANCHOR_BOTH)
+        kind = Prog::kFullMatch;
+      anchor = Prog::kAnchored;
+      // If only a small amount of text and need submatch
+      // information anyway and we're going to use OnePass or BitState
+      // to get it, we might as well not even bother with the DFA:
+      // OnePass or BitState will be fast enough.
+      // On tiny texts, OnePass outruns even the DFA, and
+      // it doesn't have the shared state and occasional mutex that
+      // the DFA does.
+      if (can_one_pass && text.size() <= 4096 &&
+          (ncap > 1 || text.size() <= 8)) {
+        skipped_test = true;
+        break;
+      }
+      if (can_bit_state && text.size() <= bit_state_text_max && ncap > 1) {
+        skipped_test = true;
+        break;
+      }
+      if (!prog_->SearchDFA(subtext, text, anchor, kind,
+                            &match, &dfa_failed, NULL)) {
+        if (dfa_failed) {
+          if (options_.log_errors())
+            LOG(ERROR) << "DFA out of memory: "
+                       << "pattern length " << pattern_.size() << ", "
+                       << "program size " << prog_->size() << ", "
+                       << "list count " << prog_->list_count() << ", "
+                       << "bytemap range " << prog_->bytemap_range();
+          // Fall back to NFA below.
+          skipped_test = true;
+          break;
+        }
+        return false;
+      }
+      break;
+  }
+  if (!skipped_test && ncap <= 1) {
+    // We know exactly where it matches.  That's enough.
+    if (ncap == 1)
+      submatch[0] = match;
+  } else {
+    StringPiece subtext1;
+    if (skipped_test) {
+      // DFA ran out of memory or was skipped:
+      // need to search in entire original text.
+      subtext1 = subtext;
+    } else {
+      // DFA found the exact match location:
+      // let NFA run an anchored, full match search
+      // to find submatch locations.
+      subtext1 = match;
+      anchor = Prog::kAnchored;
+      kind = Prog::kFullMatch;
+    }
+    if (can_one_pass && anchor != Prog::kUnanchored) {
+      if (!prog_->SearchOnePass(subtext1, text, anchor, kind, submatch, ncap)) {
+        if (!skipped_test && options_.log_errors())
+          LOG(ERROR) << "SearchOnePass inconsistency";
+        return false;
+      }
+    } else if (can_bit_state && subtext1.size() <= bit_state_text_max) {
+      if (!prog_->SearchBitState(subtext1, text, anchor,
+                                 kind, submatch, ncap)) {
+        if (!skipped_test && options_.log_errors())
+          LOG(ERROR) << "SearchBitState inconsistency";
+        return false;
+      }
+    } else {
+      if (!prog_->SearchNFA(subtext1, text, anchor, kind, submatch, ncap)) {
+        if (!skipped_test && options_.log_errors())
+          LOG(ERROR) << "SearchNFA inconsistency";
+        return false;
+      }
+    }
+  }
+  // Adjust overall match for required prefix that we stripped off.
+  if (prefixlen > 0 && nsubmatch > 0)
+    submatch[0] = StringPiece(submatch[0].data() - prefixlen,
+                              submatch[0].size() + prefixlen);
+  // Zero submatches that don't exist in the regexp.
+  for (int i = ncap; i < nsubmatch; i++)
+    submatch[i] = StringPiece();
+  return true;
+}
+// Internal matcher - like Match() but takes Args not StringPieces.
+bool RE2::DoMatch(const StringPiece& text,
+                  Anchor re_anchor,
+                  size_t* consumed,
+                  const Arg* const* args,
+                  int n) const {
+  if (!ok()) {
+    if (options_.log_errors())
+      LOG(ERROR) << "Invalid RE2: " << *error_;
+    return false;
+  }
+  if (NumberOfCapturingGroups() < n) {
+    // RE has fewer capturing groups than number of Arg pointers passed in.
+    return false;
+  }
+  // Count number of capture groups needed.
+  int nvec;
+  if (n == 0 && consumed == NULL)
+    nvec = 0;
+  else
+    nvec = n+1;
+  StringPiece* vec;
+  StringPiece stkvec[kVecSize];
+  StringPiece* heapvec = NULL;
+  if (nvec <= static_cast<int>(arraysize(stkvec))) {
+    vec = stkvec;
+  } else {
+    vec = new StringPiece[nvec];
+    heapvec = vec;
+  }
+  if (!Match(text, 0, text.size(), re_anchor, vec, nvec)) {
+    delete[] heapvec;
+    return false;
+  }
+  if (consumed != NULL)
+    *consumed = static_cast<size_t>(vec[0].end() - text.begin());
+  if (n == 0 || args == NULL) {
+    // We are not interested in results
+    delete[] heapvec;
+    return true;
+  }
+  // If we got here, we must have matched the whole pattern.
+  for (int i = 0; i < n; i++) {
+    const StringPiece& s = vec[i+1];
+    if (!args[i]->Parse(s.data(), s.size())) {
+      // TODO: Should we indicate what the error was?
+      delete[] heapvec;
+      return false;
+    }
+  }
+  delete[] heapvec;
+  return true;
+}
+// Checks that the rewrite string is well-formed with respect to this
+// regular expression.
+bool RE2::CheckRewriteString(const StringPiece& rewrite,
+                             std::string* error) const {
+  int max_token = -1;
+  for (const char *s = rewrite.data(), *end = s + rewrite.size();
+       s < end; s++) {
+    int c = *s;
+    if (c != '\\') {
+      continue;
+    }
+    if (++s == end) {
+      *error = "Rewrite schema error: '\\' not allowed at end.";
+      return false;
+    }
+    c = *s;
+    if (c == '\\') {
+      continue;
+    }
+    if (!isdigit(c)) {
+      *error = "Rewrite schema error: "
+               "'\\' must be followed by a digit or '\\'.";
+      return false;
+    }
+    int n = (c - '0');
+    if (max_token < n) {
+      max_token = n;
+    }
+  }
+  if (max_token > NumberOfCapturingGroups()) {
+    *error = StringPrintf(
+        "Rewrite schema requests %d matches, but the regexp only has %d "
+        "parenthesized subexpressions.",
+        max_token, NumberOfCapturingGroups());
+    return false;
+  }
+  return true;
+}
+// Returns the maximum submatch needed for the rewrite to be done by Replace().
+// E.g. if rewrite == "foo \\2,\\1", returns 2.
+int RE2::MaxSubmatch(const StringPiece& rewrite) {
+  int max = 0;
+  for (const char *s = rewrite.data(), *end = s + rewrite.size();
+       s < end; s++) {
+    if (*s == '\\') {
+      s++;
+      int c = (s < end) ? *s : -1;
+      if (isdigit(c)) {
+        int n = (c - '0');
+        if (n > max)
+          max = n;
+      }
+    }
+  }
+  return max;
+}
+// Append the "rewrite" string, with backslash subsitutions from "vec",
+// to string "out".
+bool RE2::Rewrite(std::string* out,
+                  const StringPiece& rewrite,
+                  const StringPiece* vec,
+                  int veclen) const {
+  for (const char *s = rewrite.data(), *end = s + rewrite.size();
+       s < end; s++) {
+    if (*s != '\\') {
+      out->push_back(*s);
+      continue;
+    }
+    s++;
+    int c = (s < end) ? *s : -1;
+    if (isdigit(c)) {
+      int n = (c - '0');
+      if (n >= veclen) {
+        if (options_.log_errors()) {
+          LOG(ERROR) << "invalid substitution \\" << n
+                     << " from " << veclen << " groups";
+        }
+        return false;
+      }
+      StringPiece snip = vec[n];
+      if (!snip.empty())
+        out->append(snip.data(), snip.size());
+    } else if (c == '\\') {
+      out->push_back('\\');
+    } else {
+      if (options_.log_errors())
+        LOG(ERROR) << "invalid rewrite pattern: " << rewrite.data();
+      return false;
+    }
+  }
+  return true;
+}
+/***** Parsers for various types *****/
+bool RE2::Arg::parse_null(const char* str, size_t n, void* dest) {
+  // We fail if somebody asked us to store into a non-NULL void* pointer
+  return (dest == NULL);
+}
+bool RE2::Arg::parse_string(const char* str, size_t n, void* dest) {
+  if (dest == NULL) return true;
+  reinterpret_cast<std::string*>(dest)->assign(str, n);
+  return true;
+}
+bool RE2::Arg::parse_stringpiece(const char* str, size_t n, void* dest) {
+  if (dest == NULL) return true;
+  *(reinterpret_cast<StringPiece*>(dest)) = StringPiece(str, n);
+  return true;
+}
+bool RE2::Arg::parse_char(const char* str, size_t n, void* dest) {
+  if (n != 1) return false;
+  if (dest == NULL) return true;
+  *(reinterpret_cast<char*>(dest)) = str[0];
+  return true;
+}
+bool RE2::Arg::parse_schar(const char* str, size_t n, void* dest) {
+  if (n != 1) return false;
+  if (dest == NULL) return true;
+  *(reinterpret_cast<signed char*>(dest)) = str[0];
+  return true;
+}
+bool RE2::Arg::parse_uchar(const char* str, size_t n, void* dest) {
+  if (n != 1) return false;
+  if (dest == NULL) return true;
+  *(reinterpret_cast<unsigned char*>(dest)) = str[0];
+  return true;
+}
+// Largest number spec that we are willing to parse
+static const int kMaxNumberLength = 32;
+// REQUIRES "buf" must have length at least nbuf.
+// Copies "str" into "buf" and null-terminates.
+// Overwrites *np with the new length.
+static const char* TerminateNumber(char* buf, size_t nbuf, const char* str,
+                                   size_t* np, bool accept_spaces) {
+  size_t n = *np;
+  if (n == 0) return "";
+  if (n > 0 && isspace(*str)) {
+    // We are less forgiving than the strtoxxx() routines and do not
+    // allow leading spaces. We do allow leading spaces for floats.
+    if (!accept_spaces) {
+      return "";
+    }
+    while (n > 0 && isspace(*str)) {
+      n--;
+      str++;
+    }
+  }
+  // Although buf has a fixed maximum size, we can still handle
+  // arbitrarily large integers correctly by omitting leading zeros.
+  // (Numbers that are still too long will be out of range.)
+  // Before deciding whether str is too long,
+  // remove leading zeros with s/000+/00/.
+  // Leaving the leading two zeros in place means that
+  // we don't change 0000x123 (invalid) into 0x123 (valid).
+  // Skip over leading - before replacing.
+  bool neg = false;
+  if (n >= 1 && str[0] == '-') {
+    neg = true;
+    n--;
+    str++;
+  }
+  if (n >= 3 && str[0] == '0' && str[1] == '0') {
+    while (n >= 3 && str[2] == '0') {
+      n--;
+      str++;
+    }
+  }
+  if (neg) {  // make room in buf for -
+    n++;
+    str--;
+  }
+  if (n > nbuf-1) return "";
+  memmove(buf, str, n);
+  if (neg) {
+    buf[0] = '-';
+  }
+  buf[n] = '\0';
+  *np = n;
+  return buf;
+}
+bool RE2::Arg::parse_long_radix(const char* str,
+                                size_t n,
+                                void* dest,
+                                int radix) {
+  if (n == 0) return false;
+  char buf[kMaxNumberLength+1];
+  str = TerminateNumber(buf, sizeof buf, str, &n, false);
+  char* end;
+  errno = 0;
+  long r = strtol(str, &end, radix);
+  if (end != str + n) return false;   // Leftover junk
+  if (errno) return false;
+  if (dest == NULL) return true;
+  *(reinterpret_cast<long*>(dest)) = r;
+  return true;
+}
+bool RE2::Arg::parse_ulong_radix(const char* str,
+                                 size_t n,
+                                 void* dest,
+                                 int radix) {
+  if (n == 0) return false;
+  char buf[kMaxNumberLength+1];
+  str = TerminateNumber(buf, sizeof buf, str, &n, false);
+  if (str[0] == '-') {
+    // strtoul() will silently accept negative numbers and parse
+    // them.  This module is more strict and treats them as errors.
+    return false;
+  }
+  char* end;
+  errno = 0;
+  unsigned long r = strtoul(str, &end, radix);
+  if (end != str + n) return false;   // Leftover junk
+  if (errno) return false;
+  if (dest == NULL) return true;
+  *(reinterpret_cast<unsigned long*>(dest)) = r;
+  return true;
+}
+bool RE2::Arg::parse_short_radix(const char* str,
+                                 size_t n,
+                                 void* dest,
+                                 int radix) {
+  long r;
+  if (!parse_long_radix(str, n, &r, radix)) return false;  // Could not parse
+  if ((short)r != r) return false;                         // Out of range
+  if (dest == NULL) return true;
+  *(reinterpret_cast<short*>(dest)) = (short)r;
+  return true;
+}
+bool RE2::Arg::parse_ushort_radix(const char* str,
+                                  size_t n,
+                                  void* dest,
+                                  int radix) {
+  unsigned long r;
+  if (!parse_ulong_radix(str, n, &r, radix)) return false;  // Could not parse
+  if ((unsigned short)r != r) return false;                 // Out of range
+  if (dest == NULL) return true;
+  *(reinterpret_cast<unsigned short*>(dest)) = (unsigned short)r;
+  return true;
+}
+bool RE2::Arg::parse_int_radix(const char* str,
+                               size_t n,
+                               void* dest,
+                               int radix) {
+  long r;
+  if (!parse_long_radix(str, n, &r, radix)) return false;  // Could not parse
+  if ((int)r != r) return false;                           // Out of range
+  if (dest == NULL) return true;
+  *(reinterpret_cast<int*>(dest)) = (int)r;
+  return true;
+}
+bool RE2::Arg::parse_uint_radix(const char* str,
+                                size_t n,
+                                void* dest,
+                                int radix) {
+  unsigned long r;
+  if (!parse_ulong_radix(str, n, &r, radix)) return false;  // Could not parse
+  if ((unsigned int)r != r) return false;                   // Out of range
+  if (dest == NULL) return true;
+  *(reinterpret_cast<unsigned int*>(dest)) = (unsigned int)r;
+  return true;
+}
+bool RE2::Arg::parse_longlong_radix(const char* str,
+                                    size_t n,
+                                    void* dest,
+                                    int radix) {
+  if (n == 0) return false;
+  char buf[kMaxNumberLength+1];
+  str = TerminateNumber(buf, sizeof buf, str, &n, false);
+  char* end;
+  errno = 0;
+  long long r = strtoll(str, &end, radix);
+  if (end != str + n) return false;   // Leftover junk
+  if (errno) return false;
+  if (dest == NULL) return true;
+  *(reinterpret_cast<long long*>(dest)) = r;
+  return true;
+}
+bool RE2::Arg::parse_ulonglong_radix(const char* str,
+                                     size_t n,
+                                     void* dest,
+                                     int radix) {
+  if (n == 0) return false;
+  char buf[kMaxNumberLength+1];
+  str = TerminateNumber(buf, sizeof buf, str, &n, false);
+  if (str[0] == '-') {
+    // strtoull() will silently accept negative numbers and parse
+    // them.  This module is more strict and treats them as errors.
+    return false;
+  }
+  char* end;
+  errno = 0;
+  unsigned long long r = strtoull(str, &end, radix);
+  if (end != str + n) return false;   // Leftover junk
+  if (errno) return false;
+  if (dest == NULL) return true;
+  *(reinterpret_cast<unsigned long long*>(dest)) = r;
+  return true;
+}
+static bool parse_double_float(const char* str, size_t n, bool isfloat,
+                               void* dest) {
+  if (n == 0) return false;
+  static const int kMaxLength = 200;
+  char buf[kMaxLength+1];
+  str = TerminateNumber(buf, sizeof buf, str, &n, true);
+  char* end;
+  errno = 0;
+  double r;
+  if (isfloat) {
+    r = strtof(str, &end);
+  } else {
+    r = strtod(str, &end);
+  }
+  if (end != str + n) return false;   // Leftover junk
+  if (errno) return false;
+  if (dest == NULL) return true;
+  if (isfloat) {
+    *(reinterpret_cast<float*>(dest)) = (float)r;
+  } else {
+    *(reinterpret_cast<double*>(dest)) = r;
+  }
+  return true;
+}
+bool RE2::Arg::parse_double(const char* str, size_t n, void* dest) {
+  return parse_double_float(str, n, false, dest);
+}
+bool RE2::Arg::parse_float(const char* str, size_t n, void* dest) {
+  return parse_double_float(str, n, true, dest);
+}
+#define DEFINE_INTEGER_PARSER(name)                                            \
+  bool RE2::Arg::parse_##name(const char* str, size_t n, void* dest) {         \
+    return parse_##name##_radix(str, n, dest, 10);                             \
+  }                                                                            \
+  bool RE2::Arg::parse_##name##_hex(const char* str, size_t n, void* dest) {   \
+    return parse_##name##_radix(str, n, dest, 16);                             \
+  }                                                                            \
+  bool RE2::Arg::parse_##name##_octal(const char* str, size_t n, void* dest) { \
+    return parse_##name##_radix(str, n, dest, 8);                              \
+  }                                                                            \
+  bool RE2::Arg::parse_##name##_cradix(const char* str, size_t n,              \
+                                       void* dest) {                           \
+    return parse_##name##_radix(str, n, dest, 0);                              \
+  }
+DEFINE_INTEGER_PARSER(short)
+DEFINE_INTEGER_PARSER(ushort)
+DEFINE_INTEGER_PARSER(int)
+DEFINE_INTEGER_PARSER(uint)
+DEFINE_INTEGER_PARSER(long)
+DEFINE_INTEGER_PARSER(ulong)
+DEFINE_INTEGER_PARSER(longlong)
+DEFINE_INTEGER_PARSER(ulonglong)
+#undef DEFINE_INTEGER_PARSER
+namespace hooks {
+#ifdef RE2_HAVE_THREAD_LOCAL
+thread_local const RE2* context = NULL;
+#endif
+template <typename T>
+union Hook {
+  void Store(T* cb) { cb_.store(cb, std::memory_order_release); }
+  T* Load() const { return cb_.load(std::memory_order_acquire); }
+#if !defined(__clang__) && defined(_MSC_VER)
+  // Citing https://github.com/protocolbuffers/protobuf/pull/4777 as precedent,
+  // this is a gross hack to make std::atomic<T*> constant-initialized on MSVC.
+  static_assert(ATOMIC_POINTER_LOCK_FREE == 2,
+                "std::atomic<T*> must be always lock-free");
+  T* cb_for_constinit_;
+#endif
+  std::atomic<T*> cb_;
+};
+template <typename T>
+static void DoNothing(const T&) {}
+#define DEFINE_HOOK(type, name)                                       \
+  static Hook<type##Callback> name##_hook = {{&DoNothing<type>}};     \
+  void Set##type##Hook(type##Callback* cb) { name##_hook.Store(cb); } \
+  type##Callback* Get##type##Hook() { return name##_hook.Load(); }
+DEFINE_HOOK(DFAStateCacheReset, dfa_state_cache_reset)
+DEFINE_HOOK(DFASearchFailure, dfa_search_failure)
+#undef DEFINE_HOOK
+}  // namespace hooks
+}  // namespace re2