npm - re2 - Versions diffs - 1.17.8 → 1.18.1 - Mend

re2 1.17.8 → 1.18.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (40) hide show

package/.github/actions/{linux-alpine-node-14 → linux-alpine-node-20}/Dockerfile +1 -1
package/.github/actions/linux-alpine-node-20/action.yml +7 -0
package/.github/actions/linux-node-12/Dockerfile +1 -1
package/.github/actions/{linux-node-19 → linux-node-20}/Dockerfile +1 -1
package/.github/actions/{linux-node-19 → linux-node-20}/action.yml +3 -3
package/.github/workflows/build.yml +15 -35
package/.github/workflows/tests.yml +2 -5
package/README.md +2 -0
package/binding.gyp +1 -0
package/package.json +7 -4
package/re2.d.ts +19 -15
package/ts-tests/test-types.ts +28 -0
package/tsconfig.json +20 -0
package/vendor/re2/bitmap256.cc +44 -0
package/vendor/re2/bitmap256.h +0 -31
package/vendor/re2/compile.cc +3 -3
package/vendor/re2/dfa.cc +1 -1
package/vendor/re2/fuzzing/re2_fuzzer.cc +38 -2
package/vendor/re2/parse.cc +1 -3
package/vendor/re2/prefilter.cc +25 -26
package/vendor/re2/prefilter.h +23 -1
package/vendor/re2/prog.cc +1 -1
package/vendor/re2/re2.cc +77 -47
package/vendor/re2/re2.h +49 -35
package/vendor/re2/regexp.cc +24 -14
package/vendor/re2/set.cc +2 -2
package/vendor/re2/simplify.cc +2 -2
package/vendor/re2/testing/filtered_re2_test.cc +2 -1
package/vendor/re2/unicode.py +2 -2
package/vendor/re2/unicode_groups.cc +150 -75
package/vendor/util/fuzz.cc +4 -4
package/vendor/util/mutex.h +18 -2
package/vendor/util/pcre.h +1 -1
package/vendor/util/rune.cc +4 -4
package/.github/actions/linux-alpine-node-14/action.yml +0 -7
package/.github/actions/linux-alpine-node-14/entrypoint.sh +0 -8
package/.github/actions/linux-alpine-node-19/Dockerfile +0 -6
package/.github/actions/linux-alpine-node-19/action.yml +0 -7
/package/.github/actions/{linux-alpine-node-19 → linux-alpine-node-20}/entrypoint.sh +0 -0
/package/.github/actions/{linux-node-19 → linux-node-20}/entrypoint.sh +0 -0

package/.github/actions/{linux-alpine-node-14 → linux-alpine-node-20}/Dockerfile RENAMED Viewed

@@ -1,4 +1,4 @@
-FROM node:14-alpine
+FROM node:20-alpine
 RUN apk add --no-cache python3 make gcc g++

package/.github/actions/linux-alpine-node-20/action.yml ADDED Viewed

@@ -0,0 +1,7 @@
+name: 'Create a binary artifact for Node 20 on Alpine Linux'
+description: 'Create a binary artifact for Node 20 on Alpine Linux using musl'
+runs:
+  using: 'docker'
+  image: 'Dockerfile'
+  args:
+    - ${{inputs.node-version}}

package/.github/actions/linux-node-12/Dockerfile CHANGED Viewed

@@ -1,7 +1,7 @@
 FROM centos:centos7
 RUN yum install -y centos-release-scl && \
-    INSTALL_PKGS="devtoolset-8 python3 make" && \
+    INSTALL_PKGS="devtoolset-8 python3 make git" && \
     yum install -y --setopt=tsflags=nodocs $INSTALL_PKGS && \
     rpm -V $INSTALL_PKGS && \
     yum -y clean all --enablerepo='*'

package/.github/actions/{linux-node-19 → linux-node-20}/Dockerfile RENAMED Viewed

@@ -1,4 +1,4 @@
-FROM node:19-buster
+FROM node:20-buster
 RUN apt install python3 make gcc g++

package/.github/actions/{linux-node-19 → linux-node-20}/action.yml RENAMED Viewed

@@ -1,10 +1,10 @@
-name: 'Create a binary artifact for Node == 19 on Linux'
-description: 'Create a binary artifact for Node == 19 on Linux using node:19-buster'
+name: 'Create a binary artifact for Node 20 on Linux'
+description: 'Create a binary artifact for Node 20 on Linux using node:20-buster'
 inputs:
   node-version:
     description: 'Node.js version'
     required: false
-    default: '19'
+    default: '20'
 runs:
   using: 'docker'
   image: 'Dockerfile'

package/.github/workflows/build.yml CHANGED Viewed

@@ -13,15 +13,13 @@ jobs:
     runs-on: ubuntu-latest
     steps:
-    - name: Create release
-      uses: actions/create-release@v1
-      env:
-        GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
-      with:
-        tag_name: ${{github.ref}}
-        release_name: Release ${{github.ref}}
-        draft: false
-        prerelease: false
+    - uses: actions/checkout@v3
+    - env:
+        GH_TOKEN: ${{github.token}}
+      run: |
+        REF=${{github.ref}}
+        TAG=${REF#"refs/tags/"}
+        gh release create -t "Release ${TAG}" -n "" "${{github.ref}}"
   build:
     name: Node.js ${{matrix.node-version}} on ${{matrix.os}}
@@ -31,10 +29,7 @@ jobs:
     strategy:
       matrix:
         os: [macOS-latest, windows-latest]
-        node-version: [14, 16, 18, 19]
-        exclude:
-          - os: windows-latest
-            node-version: 14
+        node-version: [16, 18, 20]
     steps:
     - uses: actions/checkout@v3
@@ -64,7 +59,7 @@ jobs:
     strategy:
       matrix:
-        node-version: [14, 16]
+        node-version: [16]
     steps:
     - uses: actions/checkout@v3
@@ -92,23 +87,8 @@ jobs:
       env:
         GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
-  build-linux-node-19:
-    name: Node.js 19 on Debian Buster
-    needs: create-release
-    runs-on: ubuntu-latest
-    continue-on-error: true
-    steps:
-    - uses: actions/checkout@v3
-      with:
-        submodules: true
-    - name: Install, test, and create artifact
-      uses: ./.github/actions/linux-node-19/
-      env:
-        GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
-  build-linux-alpine-node-14:
-    name: Node.js 14 on Alpine Linux
+  build-linux-node-20:
+    name: Node.js 20 on Debian Buster
     needs: create-release
     runs-on: ubuntu-latest
     continue-on-error: true
@@ -118,7 +98,7 @@ jobs:
       with:
         submodules: true
     - name: Install, test, and create artifact
-      uses: ./.github/actions/linux-alpine-node-14/
+      uses: ./.github/actions/linux-node-20/
       env:
         GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
@@ -152,8 +132,8 @@ jobs:
       env:
         GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
-  build-linux-alpine-node-19:
-    name: Node.js 19 on Alpine Linux
+  build-linux-alpine-node-20:
+    name: Node.js 20 on Alpine Linux
     needs: create-release
     runs-on: ubuntu-latest
     continue-on-error: true
@@ -163,6 +143,6 @@ jobs:
       with:
         submodules: true
     - name: Install, test, and create artifact
-      uses: ./.github/actions/linux-alpine-node-19/
+      uses: ./.github/actions/linux-alpine-node-20/
       env:
         GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}

package/.github/workflows/tests.yml CHANGED Viewed

@@ -14,10 +14,7 @@ jobs:
     strategy:
       matrix:
         os: [ubuntu-latest, macOS-latest, windows-latest]
-        node-version: [14, 16, 18, 19]
-        exclude:
-          - os: windows-latest
-            node-version: 14
+        node-version: [16, 18, 20]
     steps:
     - uses: actions/checkout@v3
@@ -33,4 +30,4 @@ jobs:
       run: |
         npm i
         npm run build --if-present
-        npm test
+        npm test && npm run ts-test

package/README.md CHANGED Viewed

@@ -352,6 +352,8 @@ console.log('re2_res : ' + re2_res); // prints: re2_res : abc,a,b,c
 ## Release history
+- 1.18.1 *Support for Node 16, 18, 20 + Darwin arm64 precompiled binaries.*
+- 1.18.0 *Modified TS bindings, added a type test (thx, [Kenichi Kamiya](https://github.com/kachick) and [Jamie Magee](https://github.com/JamieMagee)).*
 - 1.17.8 *Updated deps, added Node 19 as a pre-compilation target.*
 - 1.17.7 *Added support for a cross-platform fetching of a pre-compiled version by updating [install-artifact-from-github](https://github.com/uhop/install-artifact-from-github).*
 - 1.17.6 *Implemented `dotAll`. Thx [Michael Kriese](https://github.com/viceice).*

package/binding.gyp CHANGED Viewed

@@ -14,6 +14,7 @@
         "lib/to_string.cc",
         "lib/accessors.cc",
         "lib/util.cc",
+        "vendor/re2/bitmap256.cc",
         "vendor/re2/bitstate.cc",
         "vendor/re2/compile.cc",
         "vendor/re2/dfa.cc",

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "re2",
-  "version": "1.17.8",
+  "version": "1.18.1",
   "description": "Bindings for RE2: fast, safe alternative to backtracking regular expression engines.",
   "homepage": "https://github.com/uhop/node-re2",
   "bugs": "https://github.com/uhop/node-re2/issues",
@@ -11,15 +11,18 @@
     "test": "tests"
   },
   "dependencies": {
-    "install-artifact-from-github": "^1.3.1",
+    "install-artifact-from-github": "^1.3.3",
     "nan": "^2.17.0",
-    "node-gyp": "^9.3.0"
+    "node-gyp": "^9.3.1"
   },
   "devDependencies": {
-    "heya-unit": "^0.3.0"
+    "@types/node": "^20.2.3",
+    "heya-unit": "^0.3.0",
+    "typescript": "^5.0.4"
   },
   "scripts": {
     "test": "node tests/tests.js",
+    "ts-test": "tsc",
     "save-to-github": "save-to-github-cache --artifact build/Release/re2.node",
     "install": "install-from-cache --artifact build/Release/re2.node --host-var RE2_DOWNLOAD_MIRROR --skip-path-var RE2_DOWNLOAD_SKIP_PATH --skip-ver-var RE2_DOWNLOAD_SKIP_VER || npm run rebuild",
     "verify-build": "node scripts/verify-build.js",

package/re2.d.ts CHANGED Viewed

@@ -1,27 +1,31 @@
 declare module 're2' {
-  interface RE2MatchArray<K> extends Array<K> {
-    index?: number;
-    input?: K;
+  interface RE2BufferExecArray {
+    index: number;
+    input: Buffer;
+    0: Buffer;
     groups?: {
-      [key: string]: string
+      [key: string]: Buffer
     }
   }
-  interface RE2ExecArray<K> extends Array<K> {
-    index: number;
-    input: K;
+  interface RE2BufferMatchArray {
+    index?: number;
+    input?: Buffer;
+    0: Buffer;
     groups?: {
-      [key: string]: string
+      [key: string]: Buffer
     }
   }
   interface RE2 extends RegExp {
-    exec<K extends String | Buffer>(str: K): RE2ExecArray<K> | null;
+    exec(str: string): RegExpExecArray | null;
+    exec(str: Buffer): RE2BufferExecArray | null;
-    test(str: string | Buffer): boolean;
+    match(str: string): RegExpMatchArray | null;
+    match(str: Buffer): RE2BufferMatchArray | null;
-    match<K extends String | Buffer>(str: K): RE2MatchArray<K> | null;
+    test(str: string | Buffer): boolean;
     replace<K extends String | Buffer>(str: K, replaceValue: string | Buffer): K;
     replace<K extends String | Buffer>(str: K, replacer: (substring: string, ...args: any[]) => string | Buffer): K;
@@ -32,10 +36,10 @@ declare module 're2' {
   }
   interface RE2Constructor extends RegExpConstructor {
-    new(pattern: Buffer | RegExp | string): RE2;
-    new(pattern: Buffer | string, flags?: string): RE2;
-    (pattern: Buffer | RegExp | string): RE2;
-    (pattern: Buffer | string, flags?: string): RE2;
+    new(pattern: Buffer | RegExp | RE2 | string): RE2;
+    new(pattern: Buffer | string, flags?: string | Buffer): RE2;
+    (pattern: Buffer | RegExp | RE2 | string): RE2;
+    (pattern: Buffer | string, flags?: string | Buffer): RE2;
     readonly prototype: RE2;
     unicodeWarningLevel: 'nothing' | 'warnOnce' | 'warn' | 'throw';

package/ts-tests/test-types.ts ADDED Viewed

@@ -0,0 +1,28 @@
+import RE2 from 're2';
+function assertType<T>(_val: T) {}
+function test_execTypes() {
+  const re = new RE2('quick\\s(brown).+?(?<verb>jumps)', 'ig');
+  const result = re.exec('The Quick Brown Fox Jumps Over The Lazy Dog')
+  if (!(result && result.groups)) {
+    throw 'Unexpected Result'
+  }
+  assertType<number>(result.index)
+  assertType<string>(result.input)
+  assertType<string | undefined>(result.groups['verb'])
+}
+function test_matchTypes() {
+  const re = new RE2('quick\\s(brown).+?(?<verb>jumps)', 'ig');
+  const result = re.match('The Quick Brown Fox Jumps Over The Lazy Dog')
+  if (!(result && result.index && result.input && result.groups)) {
+    throw 'Unexpected Result'
+  }
+  assertType<number>(result.index)
+  assertType<string>(result.input)
+  assertType<string | undefined>(result.groups['verb'])
+}
+test_execTypes()
+test_matchTypes()

package/tsconfig.json ADDED Viewed

@@ -0,0 +1,20 @@
+{
+  "compilerOptions": {
+    "noEmit": true,
+    "declaration": true,
+    "esModuleInterop": true,
+    "strict": true,
+    "allowUnusedLabels": false,
+    "allowUnreachableCode": false,
+    "exactOptionalPropertyTypes": true,
+    "noFallthroughCasesInSwitch": true,
+    "noImplicitOverride": true,
+    "noImplicitReturns": true,
+    "noPropertyAccessFromIndexSignature": true,
+    "noUncheckedIndexedAccess": true,
+    "noUnusedLocals": true,
+    "noUnusedParameters": true,
+  },
+  "include": ["**/*.ts"]
+}

package/vendor/re2/bitmap256.cc ADDED Viewed

@@ -0,0 +1,44 @@
+// Copyright 2023 The RE2 Authors.  All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+#include "re2/bitmap256.h"
+#include <stdint.h>
+#include "util/util.h"
+#include "util/logging.h"
+namespace re2 {
+int Bitmap256::FindNextSetBit(int c) const {
+  DCHECK_GE(c, 0);
+  DCHECK_LE(c, 255);
+  // Check the word that contains the bit. Mask out any lower bits.
+  int i = c / 64;
+  uint64_t word = words_[i] & (~uint64_t{0} << (c % 64));
+  if (word != 0)
+    return (i * 64) + FindLSBSet(word);
+  // Check any following words.
+  i++;
+  switch (i) {
+    case 1:
+      if (words_[1] != 0)
+        return (1 * 64) + FindLSBSet(words_[1]);
+      FALLTHROUGH_INTENDED;
+    case 2:
+      if (words_[2] != 0)
+        return (2 * 64) + FindLSBSet(words_[2]);
+      FALLTHROUGH_INTENDED;
+    case 3:
+      if (words_[3] != 0)
+        return (3 * 64) + FindLSBSet(words_[3]);
+      FALLTHROUGH_INTENDED;
+    default:
+      return -1;
+  }
+}
+}  // namespace re2

package/vendor/re2/bitmap256.h CHANGED Viewed

@@ -11,7 +11,6 @@
 #include <stdint.h>
 #include <string.h>
-#include "util/util.h"
 #include "util/logging.h"
 namespace re2 {
@@ -82,36 +81,6 @@ class Bitmap256 {
   uint64_t words_[4];
 };
-int Bitmap256::FindNextSetBit(int c) const {
-  DCHECK_GE(c, 0);
-  DCHECK_LE(c, 255);
-  // Check the word that contains the bit. Mask out any lower bits.
-  int i = c / 64;
-  uint64_t word = words_[i] & (~uint64_t{0} << (c % 64));
-  if (word != 0)
-    return (i * 64) + FindLSBSet(word);
-  // Check any following words.
-  i++;
-  switch (i) {
-    case 1:
-      if (words_[1] != 0)
-        return (1 * 64) + FindLSBSet(words_[1]);
-      FALLTHROUGH_INTENDED;
-    case 2:
-      if (words_[2] != 0)
-        return (2 * 64) + FindLSBSet(words_[2]);
-      FALLTHROUGH_INTENDED;
-    case 3:
-      if (words_[3] != 0)
-        return (3 * 64) + FindLSBSet(words_[3]);
-      FALLTHROUGH_INTENDED;
-    default:
-      return -1;
-  }
-}
 }  // namespace re2
 #endif  // RE2_BITMAP256_H_

package/vendor/re2/compile.cc CHANGED Viewed

@@ -789,8 +789,8 @@ void Compiler::AddRuneRangeUTF8(Rune lo, Rune hi, bool foldcase) {
 // Should not be called.
 Frag Compiler::Copy(Frag arg) {
   // We're using WalkExponential; there should be no copying.
-  LOG(DFATAL) << "Compiler::Copy called!";
   failed_ = true;
+  LOG(DFATAL) << "Compiler::Copy called!";
   return NoMatch();
 }
@@ -916,8 +916,8 @@ Frag Compiler::PostVisit(Regexp* re, Frag, Frag, Frag* child_frags,
       CharClass* cc = re->cc();
       if (cc->empty()) {
         // This can't happen.
-        LOG(DFATAL) << "No ranges in char class";
         failed_ = true;
+        LOG(DFATAL) << "No ranges in char class";
         return NoMatch();
       }
@@ -974,8 +974,8 @@ Frag Compiler::PostVisit(Regexp* re, Frag, Frag, Frag* child_frags,
     case kRegexpNoWordBoundary:
       return EmptyWidth(kEmptyNonWordBoundary);
   }
-  LOG(DFATAL) << "Missing case in Compiler: " << re->op();
   failed_ = true;
+  LOG(DFATAL) << "Missing case in Compiler: " << re->op();
   return NoMatch();
 }

package/vendor/re2/dfa.cc CHANGED Viewed

@@ -1675,8 +1675,8 @@ bool DFA::AnalyzeSearch(SearchParams* params) {
   if (!AnalyzeSearchHelper(params, info, flags)) {
     ResetCache(params->cache_lock);
     if (!AnalyzeSearchHelper(params, info, flags)) {
-      LOG(DFATAL) << "Failed to analyze start state.";
       params->failed = true;
+      LOG(DFATAL) << "Failed to analyze start state.";
       return false;
     }
   }

package/vendor/re2/fuzzing/re2_fuzzer.cc CHANGED Viewed

@@ -9,8 +9,10 @@
 #include <string>
 #include <vector>
+#include "re2/filtered_re2.h"
 #include "re2/re2.h"
 #include "re2/regexp.h"
+#include "re2/set.h"
 #include "re2/walker-inl.h"
 using re2::StringPiece;
@@ -96,7 +98,7 @@ class SubstringWalker : public re2::Regexp::Walker<int> {
 };
 void TestOneInput(StringPiece pattern, const RE2::Options& options,
-                  StringPiece text) {
+                  RE2::Anchor anchor, StringPiece text) {
   // Crudely limit the use of ., \p, \P, \d, \D, \s, \S, \w and \W.
   // Otherwise, we will waste time on inputs that have long runs of various
   // character classes. The fuzzer has shown itself to be easily capable of
@@ -131,6 +133,9 @@ void TestOneInput(StringPiece pattern, const RE2::Options& options,
   if (backslash_p > 1)
     return;
+  // Iterate just once when fuzzing. Otherwise, we easily get bogged down
+  // and coverage is unlikely to improve despite significant expense.
+  RE2::FUZZING_ONLY_set_maximum_global_replace_count(1);
   // The default is 1000. Even 100 turned out to be too generous
   // for fuzzing, empirically speaking, so let's try 10 instead.
   re2::Regexp::FUZZING_ONLY_set_maximum_repeat_count(10);
@@ -206,6 +211,29 @@ void TestOneInput(StringPiece pattern, const RE2::Options& options,
   dummy += re.NamedCapturingGroups().size();
   dummy += re.CapturingGroupNames().size();
   dummy += RE2::QuoteMeta(pattern).size();
+  RE2::Set set(options, anchor);
+  int index = set.Add(pattern, /*error=*/NULL);  // -1 on error
+  if (index != -1 && set.Compile()) {
+    std::vector<int> matches;
+    set.Match(text, &matches);
+  }
+  re2::FilteredRE2 filter;
+  index = -1;  // not clobbered on error
+  filter.Add(pattern, options, &index);
+  if (index != -1) {
+    std::vector<std::string> atoms;
+    filter.Compile(&atoms);
+    // Pretend that all atoms match, which
+    // triggers the AND-OR tree maximally.
+    std::vector<int> matched_atoms;
+    matched_atoms.reserve(atoms.size());
+    for (size_t i = 0; i < atoms.size(); ++i)
+      matched_atoms.push_back(static_cast<int>(i));
+    std::vector<int> matches;
+    filter.AllMatches(text, matched_atoms, &matches);
+  }
 }
 // Entry point for libFuzzer.
@@ -239,9 +267,17 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
   options.set_word_boundary(fdp.ConsumeBool());
   options.set_one_line(fdp.ConsumeBool());
+  // ConsumeEnum<RE2::Anchor>() would require RE2::Anchor to specify
+  // kMaxValue, so just use PickValueInArray<RE2::Anchor>() instead.
+  RE2::Anchor anchor = fdp.PickValueInArray<RE2::Anchor>({
+      RE2::UNANCHORED,
+      RE2::ANCHOR_START,
+      RE2::ANCHOR_BOTH,
+  });
   std::string pattern = fdp.ConsumeRandomLengthString(999);
   std::string text = fdp.ConsumeRandomLengthString(999);
-  TestOneInput(pattern, options, text);
+  TestOneInput(pattern, options, anchor, text);
   return 0;
 }

package/vendor/re2/parse.cc CHANGED Viewed

@@ -1589,8 +1589,6 @@ static bool ParseEscape(StringPiece* s, Rune* rp,
     //   return true;
   }
-  LOG(DFATAL) << "Not reached in ParseEscape.";
 BadEscape:
   // Unrecognized escape sequence.
   status->set_code(kRegexpBadEscape);
@@ -2059,8 +2057,8 @@ bool Regexp::ParseState::ParsePerlFlags(StringPiece* s) {
   // Caller is supposed to check this.
   if (!(flags_ & PerlX) || t.size() < 2 || t[0] != '(' || t[1] != '?') {
-    LOG(DFATAL) << "Bad call to ParseState::ParsePerlFlags";
     status_->set_code(kRegexpInternalError);
+    LOG(DFATAL) << "Bad call to ParseState::ParsePerlFlags";
     return false;
   }

package/vendor/re2/prefilter.cc CHANGED Viewed

@@ -7,6 +7,7 @@
 #include <stddef.h>
 #include <stdint.h>
 #include <string>
+#include <utility>
 #include <vector>
 #include "util/util.h"
@@ -21,9 +22,6 @@ namespace re2 {
 static const bool ExtraDebug = false;
-typedef std::set<std::string>::iterator SSIter;
-typedef std::set<std::string>::const_iterator ConstSSIter;
 // Initializes a Prefilter, allocating subs_ as necessary.
 Prefilter::Prefilter(Op op) {
   op_ = op;
@@ -140,7 +138,7 @@ Prefilter* Prefilter::Or(Prefilter* a, Prefilter* b) {
   return AndOr(OR, a, b);
 }
-static void SimplifyStringSet(std::set<std::string>* ss) {
+void Prefilter::SimplifyStringSet(SSet* ss) {
   // Now make sure that the strings aren't redundant.  For example, if
   // we know "ab" is a required string, then it doesn't help at all to
   // know that "abc" is also a required string, so delete "abc". This
@@ -149,13 +147,19 @@ static void SimplifyStringSet(std::set<std::string>* ss) {
   // candidate for match, so further matching "abc" is redundant.
   // Note that we must ignore "" because find() would find it at the
   // start of everything and thus we would end up erasing everything.
-  for (SSIter i = ss->begin(); i != ss->end(); ++i) {
-    if (i->empty())
-      continue;
+  //
+  // The SSet sorts strings by length, then lexicographically. Note that
+  // smaller strings appear first and all strings must be unique. These
+  // observations let us skip string comparisons when possible.
+  SSIter i = ss->begin();
+  if (i != ss->end() && i->empty()) {
+    ++i;
+  }
+  for (; i != ss->end(); ++i) {
     SSIter j = i;
     ++j;
     while (j != ss->end()) {
-      if (j->find(*i) != std::string::npos) {
+      if (j->size() > i->size() && j->find(*i) != std::string::npos) {
         j = ss->erase(j);
         continue;
       }
@@ -164,7 +168,7 @@ static void SimplifyStringSet(std::set<std::string>* ss) {
   }
 }
-Prefilter* Prefilter::OrStrings(std::set<std::string>* ss) {
+Prefilter* Prefilter::OrStrings(SSet* ss) {
   Prefilter* or_prefilter = new Prefilter(NONE);
   SimplifyStringSet(ss);
   for (SSIter i = ss->begin(); i != ss->end(); ++i)
@@ -226,14 +230,14 @@ class Prefilter::Info {
   // Caller takes ownership of the Prefilter.
   Prefilter* TakeMatch();
-  std::set<std::string>& exact() { return exact_; }
+  SSet& exact() { return exact_; }
   bool is_exact() const { return is_exact_; }
   class Walker;
  private:
-  std::set<std::string> exact_;
+  SSet exact_;
   // When is_exact_ is true, the strings that match
   // are placed in exact_. When it is no longer an exact
@@ -286,18 +290,7 @@ std::string Prefilter::Info::ToString() {
   return "";
 }
-// Add the strings from src to dst.
-static void CopyIn(const std::set<std::string>& src,
-                   std::set<std::string>* dst) {
-  for (ConstSSIter i = src.begin(); i != src.end(); ++i)
-    dst->insert(*i);
-}
-// Add the cross-product of a and b to dst.
-// (For each string i in a and j in b, add i+j.)
-static void CrossProduct(const std::set<std::string>& a,
-                         const std::set<std::string>& b,
-                         std::set<std::string>* dst) {
+void Prefilter::CrossProduct(const SSet& a, const SSet& b, SSet* dst) {
   for (ConstSSIter i = a.begin(); i != a.end(); ++i)
     for (ConstSSIter j = b.begin(); j != b.end(); ++j)
       dst->insert(*i + *j);
@@ -343,8 +336,14 @@ Prefilter::Info* Prefilter::Info::Alt(Info* a, Info* b) {
   Info *ab = new Info();
   if (a->is_exact_ && b->is_exact_) {
-    CopyIn(a->exact_, &ab->exact_);
-    CopyIn(b->exact_, &ab->exact_);
+    // Avoid string copies by moving the larger exact_ set into
+    // ab directly, then merge in the smaller set.
+    if (a->exact_.size() < b->exact_.size()) {
+      using std::swap;
+      swap(a, b);
+    }
+    ab->exact_ = std::move(a->exact_);
+    ab->exact_.insert(b->exact_.begin(), b->exact_.end());
     ab->is_exact_ = true;
   } else {
     // Either a or b has is_exact_ = false. If the other
@@ -532,8 +531,8 @@ Prefilter::Info* Prefilter::Info::Walker::PostVisit(
   switch (re->op()) {
     default:
     case kRegexpRepeat:
-      LOG(DFATAL) << "Bad regexp op " << re->op();
       info = EmptyString();
+      LOG(DFATAL) << "Bad regexp op " << re->op();
       break;
     case kRegexpNoMatch: