npm - re2 - Versions diffs - 1.16.0 → 1.17.0 - Mend

re2 1.16.0 → 1.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (43) hide show

package/.github/actions/{linux-alpine-node-15 → linux-alpine-node-17}/Dockerfile +1 -1
package/.github/actions/linux-alpine-node-17/action.yml +7 -0
package/.github/actions/{linux-alpine-node-15 → linux-alpine-node-17}/entrypoint.sh +0 -0
package/.github/dependabot.yml +1 -1
package/.github/workflows/build.yml +8 -8
package/.github/workflows/tests.yml +1 -1
package/README.md +1 -0
package/package.json +3 -3
package/tests/test_general.js +6 -0
package/vendor/README +3 -1
package/vendor/re2/bitstate.cc +3 -3
package/vendor/re2/compile.cc +50 -34
package/vendor/re2/dfa.cc +24 -21
package/vendor/re2/fuzzing/re2_fuzzer.cc +96 -20
package/vendor/re2/make_perl_groups.pl +1 -1
package/vendor/re2/nfa.cc +5 -5
package/vendor/re2/onepass.cc +2 -2
package/vendor/re2/parse.cc +41 -22
package/vendor/re2/perl_groups.cc +34 -34
package/vendor/re2/prog.cc +188 -4
package/vendor/re2/prog.h +45 -13
package/vendor/re2/re2.cc +7 -12
package/vendor/re2/re2.h +7 -3
package/vendor/re2/regexp.cc +11 -5
package/vendor/re2/regexp.h +7 -2
package/vendor/re2/set.cc +3 -0
package/vendor/re2/testing/backtrack.cc +3 -3
package/vendor/re2/testing/compile_test.cc +45 -21
package/vendor/re2/testing/dfa_test.cc +4 -4
package/vendor/re2/testing/exhaustive_tester.cc +2 -2
package/vendor/re2/testing/parse_test.cc +1 -0
package/vendor/re2/testing/re2_test.cc +31 -16
package/vendor/re2/testing/regexp_benchmark.cc +108 -121
package/vendor/re2/testing/required_prefix_test.cc +78 -24
package/vendor/re2/testing/search_test.cc +2 -0
package/vendor/re2/testing/tester.cc +9 -9
package/vendor/re2/tostring.cc +1 -1
package/vendor/re2/unicode.py +1 -1
package/vendor/re2/unicode_casefold.cc +25 -11
package/vendor/re2/unicode_groups.cc +319 -151
package/vendor/re2/walker-inl.h +3 -2
package/vendor/util/mutex.h +2 -2
package/.github/actions/linux-alpine-node-15/action.yml +0 -7

package/.github/actions/{linux-alpine-node-15 → linux-alpine-node-17}/Dockerfile RENAMED Viewed

@@ -1,4 +1,4 @@
-FROM node:15-alpine
+FROM node:17-alpine
 RUN apk add --no-cache python3 make gcc g++

package/.github/actions/linux-alpine-node-17/action.yml ADDED Viewed

@@ -0,0 +1,7 @@
+name: 'Create a binary artifact for Node 17 on Alpine Linux'
+description: 'Create a binary artifact for Node 17 on Alpine Linux using musl'
+runs:
+  using: 'docker'
+  image: 'Dockerfile'
+  args:
+    - ${{inputs.node-version}}

package/.github/actions/{linux-alpine-node-15 → linux-alpine-node-17}/entrypoint.sh RENAMED Viewed

File without changes

package/.github/dependabot.yml CHANGED Viewed

@@ -5,7 +5,7 @@
 version: 2
 updates:
-  - package-ecosystem: "nvm" # See documentation for possible values
+  - package-ecosystem: "npm" # See documentation for possible values
     directory: "/" # Location of package manifests
     schedule:
       interval: "weekly"

package/.github/workflows/build.yml CHANGED Viewed

@@ -31,7 +31,7 @@ jobs:
     strategy:
       matrix:
         os: [windows-latest, macOS-latest]
-        node-version: [12, 14, 15, 16]
+        node-version: [12, 14, 16, 17]
     steps:
     - uses: actions/checkout@v2
@@ -73,7 +73,7 @@ jobs:
     strategy:
       matrix:
-        node-version: [12, 14, 15, 16]
+        node-version: [12, 14, 16, 17]
     steps:
     - uses: actions/checkout@v2
@@ -152,8 +152,8 @@ jobs:
       env:
         GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
-  build-linux-alpine-node-15:
-    name: Node.js 15 on Alpine Linux
+  build-linux-alpine-node-16:
+    name: Node.js 16 on Alpine Linux
     needs: create-release
     runs-on: ubuntu-latest
     continue-on-error: true
@@ -175,12 +175,12 @@ jobs:
           Linux-Alpine-node-
           Linux-Alpine-
     - name: Install, test, and create artifact
-      uses: ./.github/actions/linux-alpine-node-15/
+      uses: ./.github/actions/linux-alpine-node-16/
       env:
         GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
-  build-linux-alpine-node-16:
-    name: Node.js 16 on Alpine Linux
+  build-linux-alpine-node-17:
+    name: Node.js 17 on Alpine Linux
     needs: create-release
     runs-on: ubuntu-latest
     continue-on-error: true
@@ -202,6 +202,6 @@ jobs:
           Linux-Alpine-node-
           Linux-Alpine-
     - name: Install, test, and create artifact
-      uses: ./.github/actions/linux-alpine-node-16/
+      uses: ./.github/actions/linux-alpine-node-17/
       env:
         GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}

package/.github/workflows/tests.yml CHANGED Viewed

@@ -14,7 +14,7 @@ jobs:
     strategy:
       matrix:
         os: [ubuntu-latest, windows-latest, macOS-latest]
-        node-version: [12, 14, 15, 16]
+        node-version: [12, 14, 16, 17]
     steps:
     - uses: actions/checkout@v2

package/README.md CHANGED Viewed

@@ -343,6 +343,7 @@ console.log('re2_res : ' + re2_res); // prints: re2_res : abc,a,b,c
 ## Release history
+- 1.17.0 *Updated GYP, added support for Node 17, updated deps.*
 - 1.16.0 *Updated the compiler (thx, [Sergei Dyshel](https://github.com/sergei-dyshel)), updated GYP, removed support for Node 10, added support for Node 16, updated TS bindings (thx, [BannerBomb](https://github.com/BannerBomb)).*
 - 1.15.9 *Updated deps.*
 - 1.15.8 *Updated deps.*

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "re2",
-  "version": "1.16.0",
+  "version": "1.17.0",
   "description": "Bindings for RE2: fast, safe alternative to backtracking regular expression engines.",
   "homepage": "https://github.com/uhop/node-re2",
   "bugs": "https://github.com/uhop/node-re2/issues",
@@ -11,8 +11,8 @@
   },
   "dependencies": {
     "install-artifact-from-github": "^1.2.0",
-    "nan": "^2.14.2",
-    "node-gyp": "^8.0.0"
+    "nan": "^2.15.0",
+    "node-gyp": "^8.4.1"
   },
   "devDependencies": {
     "heya-unit": "^0.3.0"

package/tests/test_general.js CHANGED Viewed

@@ -209,6 +209,12 @@ unit.add(module, [
 		eval(t.TEST("s3.length === 1"));
 		eval(t.TEST("RE2.getUtf8Length(s3) === 3"));
+		var s4 = "🤡";
+		eval(t.TEST("s4.length === 2"));
+		eval(t.TEST("RE2.getUtf8Length(s4) === 4"));
+		eval(t.TEST("RE2.getUtf16Length(Buffer.from(s4, 'utf8')) === s4.length"));
 		var b3 = new Buffer([0xF0]);
 		eval(t.TEST("b3.length === 1"));

package/vendor/README CHANGED Viewed

@@ -31,10 +31,12 @@ The Python wrapper is at https://github.com/google/re2/tree/abseil/python
 and on PyPI (https://pypi.org/project/google-re2/).
 A C wrapper is at https://github.com/marcomaggi/cre2/.
+A D wrapper is at https://github.com/ShigekiKarita/re2d/ and on DUB (code.dlang.org).
 An Erlang wrapper is at https://github.com/dukesoferl/re2/ and on Hex (hex.pm).
 An Inferno wrapper is at https://github.com/powerman/inferno-re2/.
 A Node.js wrapper is at https://github.com/uhop/node-re2/ and on NPM (npmjs.com).
 An OCaml wrapper is at https://github.com/janestreet/re2/ and on OPAM (opam.ocaml.org).
 A Perl wrapper is at https://github.com/dgl/re-engine-RE2/ and on CPAN (cpan.org).
-An R wrapper is at https://github.com/qinwf/re2r/ and on CRAN (cran.r-project.org).
+An R wrapper is at https://github.com/girishji/re2/ and on CRAN (cran.r-project.org).
 A Ruby wrapper is at https://github.com/mudge/re2/ and on RubyGems (rubygems.org).
+A WebAssembly wrapper is at https://github.com/google/re2-wasm/ and on NPM (npmjs.com).

package/vendor/re2/bitstate.cc CHANGED Viewed

@@ -293,9 +293,9 @@ bool BitState::Search(const StringPiece& text, const StringPiece& context,
   context_ = context;
   if (context_.data() == NULL)
     context_ = text;
-  if (prog_->anchor_start() && context_.begin() != text.begin())
+  if (prog_->anchor_start() && BeginPtr(context_) != BeginPtr(text))
     return false;
-  if (prog_->anchor_end() && context_.end() != text.end())
+  if (prog_->anchor_end() && EndPtr(context_) != EndPtr(text))
     return false;
   anchored_ = anchored || prog_->anchor_start();
   longest_ = longest || prog_->anchor_end();
@@ -377,7 +377,7 @@ bool Prog::SearchBitState(const StringPiece& text,
   bool longest = kind != kFirstMatch;
   if (!b.Search(text, context, anchored, longest, match, nmatch))
     return false;
-  if (kind == kFullMatch && match[0].end() != text.end())
+  if (kind == kFullMatch && EndPtr(match[0]) != EndPtr(text))
     return false;
   return true;
 }

package/vendor/re2/compile.cc CHANGED Viewed

@@ -79,9 +79,11 @@ static const PatchList kNullPatchList = {0, 0};
 struct Frag {
   uint32_t begin;
   PatchList end;
+  bool nullable;
-  Frag() : begin(0) { end.head = 0; }  // needed so Frag can go in vector
-  Frag(uint32_t begin, PatchList end) : begin(begin), end(end) {}
+  Frag() : begin(0), end(kNullPatchList), nullable(false) {}
+  Frag(uint32_t begin, PatchList end, bool nullable)
+      : begin(begin), end(end), nullable(nullable) {}
 };
 // Input encodings.
@@ -264,7 +266,7 @@ int Compiler::AllocInst(int n) {
 // Returns an unmatchable fragment.
 Frag Compiler::NoMatch() {
-  return Frag(0, kNullPatchList);
+  return Frag();
 }
 // Is a an unmatchable fragment?
@@ -290,11 +292,11 @@ Frag Compiler::Cat(Frag a, Frag b) {
   // To run backward over string, reverse all concatenations.
   if (reversed_) {
     PatchList::Patch(inst_.data(), b.end, a.begin);
-    return Frag(b.begin, a.end);
+    return Frag(b.begin, a.end, b.nullable && a.nullable);
   }
   PatchList::Patch(inst_.data(), a.end, b.begin);
-  return Frag(a.begin, b.end);
+  return Frag(a.begin, b.end, a.nullable && b.nullable);
 }
 // Given fragments for a and b, returns fragment for a|b.
@@ -310,7 +312,8 @@ Frag Compiler::Alt(Frag a, Frag b) {
     return NoMatch();
   inst_[id].InitAlt(a.begin, b.begin);
-  return Frag(id, PatchList::Append(inst_.data(), a.end, b.end));
+  return Frag(id, PatchList::Append(inst_.data(), a.end, b.end),
+              a.nullable || b.nullable);
 }
 // When capturing submatches in like-Perl mode, a kOpAlt Inst
@@ -320,27 +323,44 @@ Frag Compiler::Alt(Frag a, Frag b) {
 // then the operator is greedy.  If out1_ is the repetition
 // (and out_ moves forward), then the operator is non-greedy.
-// Given a fragment a, returns a fragment for a* or a*? (if nongreedy)
-Frag Compiler::Star(Frag a, bool nongreedy) {
+// Given a fragment for a, returns a fragment for a+ or a+? (if nongreedy)
+Frag Compiler::Plus(Frag a, bool nongreedy) {
   int id = AllocInst(1);
   if (id < 0)
     return NoMatch();
-  inst_[id].InitAlt(0, 0);
-  PatchList::Patch(inst_.data(), a.end, id);
+  PatchList pl;
   if (nongreedy) {
-    inst_[id].out1_ = a.begin;
-    return Frag(id, PatchList::Mk(id << 1));
+    inst_[id].InitAlt(0, a.begin);
+    pl = PatchList::Mk(id << 1);
   } else {
-    inst_[id].set_out(a.begin);
-    return Frag(id, PatchList::Mk((id << 1) | 1));
+    inst_[id].InitAlt(a.begin, 0);
+    pl = PatchList::Mk((id << 1) | 1);
   }
+  PatchList::Patch(inst_.data(), a.end, id);
+  return Frag(a.begin, pl, a.nullable);
 }
-// Given a fragment for a, returns a fragment for a+ or a+? (if nongreedy)
-Frag Compiler::Plus(Frag a, bool nongreedy) {
-  // a+ is just a* with a different entry point.
-  Frag f = Star(a, nongreedy);
-  return Frag(a.begin, f.end);
+// Given a fragment for a, returns a fragment for a* or a*? (if nongreedy)
+Frag Compiler::Star(Frag a, bool nongreedy) {
+  // When the subexpression is nullable, one Alt isn't enough to guarantee
+  // correct priority ordering within the transitive closure. The simplest
+  // solution is to handle it as (a+)? instead, which adds the second Alt.
+  if (a.nullable)
+    return Quest(Plus(a, nongreedy), nongreedy);
+  int id = AllocInst(1);
+  if (id < 0)
+    return NoMatch();
+  PatchList pl;
+  if (nongreedy) {
+    inst_[id].InitAlt(0, a.begin);
+    pl = PatchList::Mk(id << 1);
+  } else {
+    inst_[id].InitAlt(a.begin, 0);
+    pl = PatchList::Mk((id << 1) | 1);
+  }
+  PatchList::Patch(inst_.data(), a.end, id);
+  return Frag(id, pl, true);
 }
 // Given a fragment for a, returns a fragment for a? or a?? (if nongreedy)
@@ -358,7 +378,7 @@ Frag Compiler::Quest(Frag a, bool nongreedy) {
     inst_[id].InitAlt(a.begin, 0);
     pl = PatchList::Mk((id << 1) | 1);
   }
-  return Frag(id, PatchList::Append(inst_.data(), pl, a.end));
+  return Frag(id, PatchList::Append(inst_.data(), pl, a.end), true);
 }
 // Returns a fragment for the byte range lo-hi.
@@ -367,7 +387,7 @@ Frag Compiler::ByteRange(int lo, int hi, bool foldcase) {
   if (id < 0)
     return NoMatch();
   inst_[id].InitByteRange(lo, hi, foldcase, 0);
-  return Frag(id, PatchList::Mk(id << 1));
+  return Frag(id, PatchList::Mk(id << 1), false);
 }
 // Returns a no-op fragment.  Sometimes unavoidable.
@@ -376,7 +396,7 @@ Frag Compiler::Nop() {
   if (id < 0)
     return NoMatch();
   inst_[id].InitNop(0);
-  return Frag(id, PatchList::Mk(id << 1));
+  return Frag(id, PatchList::Mk(id << 1), true);
 }
 // Returns a fragment that signals a match.
@@ -385,7 +405,7 @@ Frag Compiler::Match(int32_t match_id) {
   if (id < 0)
     return NoMatch();
   inst_[id].InitMatch(match_id);
-  return Frag(id, kNullPatchList);
+  return Frag(id, kNullPatchList, false);
 }
 // Returns a fragment matching a particular empty-width op (like ^ or $)
@@ -394,7 +414,7 @@ Frag Compiler::EmptyWidth(EmptyOp empty) {
   if (id < 0)
     return NoMatch();
   inst_[id].InitEmptyWidth(empty, 0);
-  return Frag(id, PatchList::Mk(id << 1));
+  return Frag(id, PatchList::Mk(id << 1), true);
 }
 // Given a fragment a, returns a fragment with capturing parens around a.
@@ -408,7 +428,7 @@ Frag Compiler::Capture(Frag a, int n) {
   inst_[id+1].InitCapture(2*n+1, 0);
   PatchList::Patch(inst_.data(), a.end, id+1);
-  return Frag(id, PatchList::Mk((id+1) << 1));
+  return Frag(id, PatchList::Mk((id+1) << 1), a.nullable);
 }
 // A Rune is a name for a Unicode code point.
@@ -567,7 +587,7 @@ bool Compiler::ByteRangeEqual(int id1, int id2) {
 Frag Compiler::FindByteRange(int root, int id) {
   if (inst_[root].opcode() == kInstByteRange) {
     if (ByteRangeEqual(root, id))
-      return Frag(root, kNullPatchList);
+      return Frag(root, kNullPatchList, false);
     else
       return NoMatch();
   }
@@ -575,7 +595,7 @@ Frag Compiler::FindByteRange(int root, int id) {
   while (inst_[root].opcode() == kInstAlt) {
     int out1 = inst_[root].out1();
     if (ByteRangeEqual(out1, id))
-      return Frag(root, PatchList::Mk((root << 1) | 1));
+      return Frag(root, PatchList::Mk((root << 1) | 1), false);
     // CharClass is a sorted list of ranges, so if out1 of the root Alt wasn't
     // what we're looking for, then we can stop immediately. Unfortunately, we
@@ -587,7 +607,7 @@ Frag Compiler::FindByteRange(int root, int id) {
     if (inst_[out].opcode() == kInstAlt)
       root = out;
     else if (ByteRangeEqual(out, id))
-      return Frag(root, PatchList::Mk(root << 1));
+      return Frag(root, PatchList::Mk(root << 1), false);
     else
       return NoMatch();
   }
@@ -1156,12 +1176,8 @@ Prog* Compiler::Finish(Regexp* re) {
   if (!prog_->reversed()) {
     std::string prefix;
     bool prefix_foldcase;
-    if (re->RequiredPrefixForAccel(&prefix, &prefix_foldcase) &&
-        !prefix_foldcase) {
-      prog_->prefix_size_ = prefix.size();
-      prog_->prefix_front_ = prefix.front();
-      prog_->prefix_back_ = prefix.back();
-    }
+    if (re->RequiredPrefixForAccel(&prefix, &prefix_foldcase))
+      prog_->ConfigurePrefixAccel(prefix, prefix_foldcase);
   }
   // Record remaining memory for DFA.

package/vendor/re2/dfa.cc CHANGED Viewed

@@ -56,6 +56,10 @@ namespace re2 {
 // Controls whether the DFA should bail out early if the NFA would be faster.
 static bool dfa_should_bail_when_slow = true;
+void Prog::TESTING_ONLY_set_dfa_should_bail_when_slow(bool b) {
+  dfa_should_bail_when_slow = b;
+}
 // Changing this to true compiles in prints that trace execution of the DFA.
 // Generates a lot of output -- only useful for debugging.
 static const bool ExtraDebug = false;
@@ -167,6 +171,9 @@ class DFA {
   typedef std::unordered_set<State*, StateHash, StateEqual> StateSet;
  private:
+  // Make it easier to swap in a scalable reader-writer mutex.
+  using CacheMutex = Mutex;
   enum {
     // Indices into start_ for unanchored searches.
     // Add kStartAnchored for anchored searches.
@@ -331,7 +338,7 @@ class DFA {
   // while holding cache_mutex_ for writing, to avoid interrupting other
   // readers.  Any State* pointers are only valid while cache_mutex_
   // is held.
-  Mutex cache_mutex_;
+  CacheMutex cache_mutex_;
   int64_t mem_budget_;     // Total memory budget for all States.
   int64_t state_budget_;   // Amount of memory remaining for new States.
   StateSet state_cache_;   // All States computed so far.
@@ -1106,7 +1113,7 @@ DFA::State* DFA::RunStateOnByte(State* state, int c) {
 class DFA::RWLocker {
  public:
-  explicit RWLocker(Mutex* mu);
+  explicit RWLocker(CacheMutex* mu);
   ~RWLocker();
   // If the lock is only held for reading right now,
@@ -1116,14 +1123,14 @@ class DFA::RWLocker {
   void LockForWriting();
  private:
-  Mutex* mu_;
+  CacheMutex* mu_;
   bool writing_;
   RWLocker(const RWLocker&) = delete;
   RWLocker& operator=(const RWLocker&) = delete;
 };
-DFA::RWLocker::RWLocker(Mutex* mu) : mu_(mu), writing_(false) {
+DFA::RWLocker::RWLocker(CacheMutex* mu) : mu_(mu), writing_(false) {
   mu_->ReaderLock();
 }
@@ -1481,15 +1488,15 @@ inline bool DFA::InlinedSearchLoop(SearchParams* params) {
   int lastbyte;
   if (run_forward) {
-    if (params->text.end() == params->context.end())
+    if (EndPtr(params->text) == EndPtr(params->context))
       lastbyte = kByteEndText;
     else
-      lastbyte = params->text.end()[0] & 0xFF;
+      lastbyte = EndPtr(params->text)[0] & 0xFF;
   } else {
-    if (params->text.begin() == params->context.begin())
+    if (BeginPtr(params->text) == BeginPtr(params->context))
       lastbyte = kByteEndText;
     else
-      lastbyte = params->text.begin()[-1] & 0xFF;
+      lastbyte = BeginPtr(params->text)[-1] & 0xFF;
   }
   State* ns = s->next_[ByteMap(lastbyte)].load(std::memory_order_acquire);
@@ -1620,7 +1627,7 @@ bool DFA::AnalyzeSearch(SearchParams* params) {
   const StringPiece& context = params->context;
   // Sanity check: make sure that text lies within context.
-  if (text.begin() < context.begin() || text.end() > context.end()) {
+  if (BeginPtr(text) < BeginPtr(context) || EndPtr(text) > EndPtr(context)) {
     LOG(DFATAL) << "context does not contain text";
     params->start = DeadState;
     return true;
@@ -1630,13 +1637,13 @@ bool DFA::AnalyzeSearch(SearchParams* params) {
   int start;
   uint32_t flags;
   if (params->run_forward) {
-    if (text.begin() == context.begin()) {
+    if (BeginPtr(text) == BeginPtr(context)) {
       start = kStartBeginText;
       flags = kEmptyBeginText|kEmptyBeginLine;
-    } else if (text.begin()[-1] == '\n') {
+    } else if (BeginPtr(text)[-1] == '\n') {
       start = kStartBeginLine;
       flags = kEmptyBeginLine;
-    } else if (Prog::IsWordChar(text.begin()[-1] & 0xFF)) {
+    } else if (Prog::IsWordChar(BeginPtr(text)[-1] & 0xFF)) {
       start = kStartAfterWordChar;
       flags = kFlagLastWord;
     } else {
@@ -1644,13 +1651,13 @@ bool DFA::AnalyzeSearch(SearchParams* params) {
       flags = 0;
     }
   } else {
-    if (text.end() == context.end()) {
+    if (EndPtr(text) == EndPtr(context)) {
       start = kStartBeginText;
       flags = kEmptyBeginText|kEmptyBeginLine;
-    } else if (text.end()[0] == '\n') {
+    } else if (EndPtr(text)[0] == '\n') {
       start = kStartBeginLine;
       flags = kEmptyBeginLine;
-    } else if (Prog::IsWordChar(text.end()[0] & 0xFF)) {
+    } else if (Prog::IsWordChar(EndPtr(text)[0] & 0xFF)) {
       start = kStartAfterWordChar;
       flags = kFlagLastWord;
     } else {
@@ -1830,9 +1837,9 @@ bool Prog::SearchDFA(const StringPiece& text, const StringPiece& const_context,
     using std::swap;
     swap(caret, dollar);
   }
-  if (caret && context.begin() != text.begin())
+  if (caret && BeginPtr(context) != BeginPtr(text))
     return false;
-  if (dollar && context.end() != text.end())
+  if (dollar && EndPtr(context) != EndPtr(text))
     return false;
   // Handle full match by running an anchored longest match
@@ -1963,10 +1970,6 @@ int Prog::BuildEntireDFA(MatchKind kind, const DFAStateCallback& cb) {
   return GetDFA(kind)->BuildAllStates(cb);
 }
-void Prog::TEST_dfa_should_bail_when_slow(bool b) {
-  dfa_should_bail_when_slow = b;
-}
 // Computes min and max for matching string.
 // Won't return strings bigger than maxlen.
 bool DFA::PossibleMatchRange(std::string* min, std::string* max, int maxlen) {

package/vendor/re2/fuzzing/re2_fuzzer.cc CHANGED Viewed

@@ -5,19 +5,96 @@
 #include <fuzzer/FuzzedDataProvider.h>
 #include <stddef.h>
 #include <stdint.h>
-#include <memory>
-#include <queue>
+#include <algorithm>
 #include <string>
 #include <vector>
-#include "re2/prefilter.h"
 #include "re2/re2.h"
+#include "re2/regexp.h"
+#include "re2/walker-inl.h"
 using re2::StringPiece;
 // NOT static, NOT signed.
 uint8_t dummy = 0;
+// Walks kRegexpConcat and kRegexpAlternate subexpressions
+// to determine their maximum length.
+class SubexpressionWalker : public re2::Regexp::Walker<int> {
+ public:
+  SubexpressionWalker() = default;
+  ~SubexpressionWalker() override = default;
+  int PostVisit(re2::Regexp* re, int parent_arg, int pre_arg,
+                int* child_args, int nchild_args) override {
+    switch (re->op()) {
+      case re2::kRegexpConcat:
+      case re2::kRegexpAlternate: {
+        int max = nchild_args;
+        for (int i = 0; i < nchild_args; i++)
+          max = std::max(max, child_args[i]);
+        return max;
+      }
+      default:
+        break;
+    }
+    return -1;
+  }
+  // Should never be called: we use Walk(), not WalkExponential().
+  int ShortVisit(re2::Regexp* re, int parent_arg) override {
+    return parent_arg;
+  }
+ private:
+  SubexpressionWalker(const SubexpressionWalker&) = delete;
+  SubexpressionWalker& operator=(const SubexpressionWalker&) = delete;
+};
+// Walks substrings (i.e. kRegexpLiteralString subexpressions)
+// to determine their maximum length... in runes, but avoiding
+// overheads due to UTF-8 encoding is worthwhile when fuzzing.
+class SubstringWalker : public re2::Regexp::Walker<int> {
+ public:
+  SubstringWalker() = default;
+  ~SubstringWalker() override = default;
+  int PostVisit(re2::Regexp* re, int parent_arg, int pre_arg,
+                int* child_args, int nchild_args) override {
+    switch (re->op()) {
+      case re2::kRegexpConcat:
+      case re2::kRegexpAlternate:
+      case re2::kRegexpStar:
+      case re2::kRegexpPlus:
+      case re2::kRegexpQuest:
+      case re2::kRegexpRepeat:
+      case re2::kRegexpCapture: {
+        int max = -1;
+        for (int i = 0; i < nchild_args; i++)
+          max = std::max(max, child_args[i]);
+        return max;
+      }
+      case re2::kRegexpLiteralString:
+        return re->nrunes();
+      default:
+        break;
+    }
+    return -1;
+  }
+  // Should never be called: we use Walk(), not WalkExponential().
+  int ShortVisit(re2::Regexp* re, int parent_arg) override {
+    return parent_arg;
+  }
+ private:
+  SubstringWalker(const SubstringWalker&) = delete;
+  SubstringWalker& operator=(const SubstringWalker&) = delete;
+};
 void TestOneInput(StringPiece pattern, const RE2::Options& options,
                   StringPiece text) {
   // Crudely limit the use of ., \p, \P, \d, \D, \s, \S, \w and \W.
@@ -26,11 +103,15 @@ void TestOneInput(StringPiece pattern, const RE2::Options& options,
   // generating such patterns that fall within the other limits, but result
   // in timeouts nonetheless. The marginal cost is high - even more so when
   // counted repetition is involved - whereas the marginal benefit is zero.
+  // Crudely limit the use of 'k', 'K', 's' and 'S' too because they become
+  // three-element character classes when case-insensitive and using UTF-8.
   // TODO(junyer): Handle [:isalnum:] et al. when they start to cause pain.
   int char_class = 0;
   int backslash_p = 0;  // very expensive, so handle specially
   for (size_t i = 0; i < pattern.size(); i++) {
-    if (pattern[i] == '.')
+    if (pattern[i] == '.' ||
+        pattern[i] == 'k' || pattern[i] == 'K' ||
+        pattern[i] == 's' || pattern[i] == 'S')
       char_class++;
     if (pattern[i] != '\\')
       continue;
@@ -50,31 +131,26 @@ void TestOneInput(StringPiece pattern, const RE2::Options& options,
   if (backslash_p > 1)
     return;
+  // The default is 1000. Even 100 turned out to be too generous
+  // for fuzzing, empirically speaking, so let's try 10 instead.
+  re2::Regexp::FUZZING_ONLY_set_maximum_repeat_count(10);
   RE2 re(pattern, options);
   if (!re.ok())
     return;
+  // Don't waste time fuzzing programs with large subexpressions.
+  // They can cause bug reports due to fuzzer timeouts. And they
+  // aren't interesting for fuzzing purposes.
+  if (SubexpressionWalker().Walk(re.Regexp(), -1) > 9)
+    return;
   // Don't waste time fuzzing programs with large substrings.
   // They can cause bug reports due to fuzzer timeouts when they
   // are repetitions (e.g. hundreds of NUL bytes) and matching is
   // unanchored. And they aren't interesting for fuzzing purposes.
-  std::unique_ptr<re2::Prefilter> prefilter(re2::Prefilter::FromRE2(&re));
-  if (prefilter == nullptr)
+  if (SubstringWalker().Walk(re.Regexp(), -1) > 9)
     return;
-  std::queue<re2::Prefilter*> nodes;
-  nodes.push(prefilter.get());
-  while (!nodes.empty()) {
-    re2::Prefilter* node = nodes.front();
-    nodes.pop();
-    if (node->op() == re2::Prefilter::ATOM) {
-      if (node->atom().size() > 9)
-        return;
-    } else if (node->op() == re2::Prefilter::AND ||
-               node->op() == re2::Prefilter::OR) {
-      for (re2::Prefilter* sub : *node->subs())
-        nodes.push(sub);
-    }
-  }
   // Don't waste time fuzzing high-size programs.
   // They can cause bug reports due to fuzzer timeouts.

package/vendor/re2/make_perl_groups.pl CHANGED Viewed

@@ -76,7 +76,7 @@ sub PrintClass($$@) {
   } else {
     $negname =~ y/a-z/A-Z/;
   }
-  return "{ \"$escname\", +1, code$cnum, $n }", "{ \"$negname\", -1, code$cnum, $n }";
+  return "{ \"$escname\", +1, code$cnum, $n, 0, 0 }", "{ \"$negname\", -1, code$cnum, $n, 0, 0 }";
 }
 my $cnum = 0;