re2 1.17.8 → 1.18.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/.github/actions/{linux-alpine-node-14 → linux-alpine-node-20}/Dockerfile +1 -1
  2. package/.github/actions/linux-alpine-node-20/action.yml +7 -0
  3. package/.github/actions/linux-node-12/Dockerfile +1 -1
  4. package/.github/actions/{linux-node-19 → linux-node-20}/Dockerfile +1 -1
  5. package/.github/actions/{linux-node-19 → linux-node-20}/action.yml +3 -3
  6. package/.github/workflows/build.yml +15 -35
  7. package/.github/workflows/tests.yml +2 -5
  8. package/README.md +2 -0
  9. package/binding.gyp +1 -0
  10. package/package.json +7 -4
  11. package/re2.d.ts +19 -15
  12. package/ts-tests/test-types.ts +28 -0
  13. package/tsconfig.json +20 -0
  14. package/vendor/re2/bitmap256.cc +44 -0
  15. package/vendor/re2/bitmap256.h +0 -31
  16. package/vendor/re2/compile.cc +3 -3
  17. package/vendor/re2/dfa.cc +1 -1
  18. package/vendor/re2/fuzzing/re2_fuzzer.cc +38 -2
  19. package/vendor/re2/parse.cc +1 -3
  20. package/vendor/re2/prefilter.cc +25 -26
  21. package/vendor/re2/prefilter.h +23 -1
  22. package/vendor/re2/prog.cc +1 -1
  23. package/vendor/re2/re2.cc +77 -47
  24. package/vendor/re2/re2.h +49 -35
  25. package/vendor/re2/regexp.cc +24 -14
  26. package/vendor/re2/set.cc +2 -2
  27. package/vendor/re2/simplify.cc +2 -2
  28. package/vendor/re2/testing/filtered_re2_test.cc +2 -1
  29. package/vendor/re2/unicode.py +2 -2
  30. package/vendor/re2/unicode_groups.cc +150 -75
  31. package/vendor/util/fuzz.cc +4 -4
  32. package/vendor/util/mutex.h +18 -2
  33. package/vendor/util/pcre.h +1 -1
  34. package/vendor/util/rune.cc +4 -4
  35. package/.github/actions/linux-alpine-node-14/action.yml +0 -7
  36. package/.github/actions/linux-alpine-node-14/entrypoint.sh +0 -8
  37. package/.github/actions/linux-alpine-node-19/Dockerfile +0 -6
  38. package/.github/actions/linux-alpine-node-19/action.yml +0 -7
  39. /package/.github/actions/{linux-alpine-node-19 → linux-alpine-node-20}/entrypoint.sh +0 -0
  40. /package/.github/actions/{linux-node-19 → linux-node-20}/entrypoint.sh +0 -0
@@ -1,4 +1,4 @@
1
- FROM node:14-alpine
1
+ FROM node:20-alpine
2
2
 
3
3
  RUN apk add --no-cache python3 make gcc g++
4
4
 
@@ -0,0 +1,7 @@
1
+ name: 'Create a binary artifact for Node 20 on Alpine Linux'
2
+ description: 'Create a binary artifact for Node 20 on Alpine Linux using musl'
3
+ runs:
4
+ using: 'docker'
5
+ image: 'Dockerfile'
6
+ args:
7
+ - ${{inputs.node-version}}
@@ -1,7 +1,7 @@
1
1
  FROM centos:centos7
2
2
 
3
3
  RUN yum install -y centos-release-scl && \
4
- INSTALL_PKGS="devtoolset-8 python3 make" && \
4
+ INSTALL_PKGS="devtoolset-8 python3 make git" && \
5
5
  yum install -y --setopt=tsflags=nodocs $INSTALL_PKGS && \
6
6
  rpm -V $INSTALL_PKGS && \
7
7
  yum -y clean all --enablerepo='*'
@@ -1,4 +1,4 @@
1
- FROM node:19-buster
1
+ FROM node:20-buster
2
2
 
3
3
  RUN apt install python3 make gcc g++
4
4
 
@@ -1,10 +1,10 @@
1
- name: 'Create a binary artifact for Node == 19 on Linux'
2
- description: 'Create a binary artifact for Node == 19 on Linux using node:19-buster'
1
+ name: 'Create a binary artifact for Node 20 on Linux'
2
+ description: 'Create a binary artifact for Node 20 on Linux using node:20-buster'
3
3
  inputs:
4
4
  node-version:
5
5
  description: 'Node.js version'
6
6
  required: false
7
- default: '19'
7
+ default: '20'
8
8
  runs:
9
9
  using: 'docker'
10
10
  image: 'Dockerfile'
@@ -13,15 +13,13 @@ jobs:
13
13
  runs-on: ubuntu-latest
14
14
 
15
15
  steps:
16
- - name: Create release
17
- uses: actions/create-release@v1
18
- env:
19
- GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
20
- with:
21
- tag_name: ${{github.ref}}
22
- release_name: Release ${{github.ref}}
23
- draft: false
24
- prerelease: false
16
+ - uses: actions/checkout@v3
17
+ - env:
18
+ GH_TOKEN: ${{github.token}}
19
+ run: |
20
+ REF=${{github.ref}}
21
+ TAG=${REF#"refs/tags/"}
22
+ gh release create -t "Release ${TAG}" -n "" "${{github.ref}}"
25
23
 
26
24
  build:
27
25
  name: Node.js ${{matrix.node-version}} on ${{matrix.os}}
@@ -31,10 +29,7 @@ jobs:
31
29
  strategy:
32
30
  matrix:
33
31
  os: [macOS-latest, windows-latest]
34
- node-version: [14, 16, 18, 19]
35
- exclude:
36
- - os: windows-latest
37
- node-version: 14
32
+ node-version: [16, 18, 20]
38
33
 
39
34
  steps:
40
35
  - uses: actions/checkout@v3
@@ -64,7 +59,7 @@ jobs:
64
59
 
65
60
  strategy:
66
61
  matrix:
67
- node-version: [14, 16]
62
+ node-version: [16]
68
63
 
69
64
  steps:
70
65
  - uses: actions/checkout@v3
@@ -92,23 +87,8 @@ jobs:
92
87
  env:
93
88
  GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
94
89
 
95
- build-linux-node-19:
96
- name: Node.js 19 on Debian Buster
97
- needs: create-release
98
- runs-on: ubuntu-latest
99
- continue-on-error: true
100
-
101
- steps:
102
- - uses: actions/checkout@v3
103
- with:
104
- submodules: true
105
- - name: Install, test, and create artifact
106
- uses: ./.github/actions/linux-node-19/
107
- env:
108
- GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
109
-
110
- build-linux-alpine-node-14:
111
- name: Node.js 14 on Alpine Linux
90
+ build-linux-node-20:
91
+ name: Node.js 20 on Debian Buster
112
92
  needs: create-release
113
93
  runs-on: ubuntu-latest
114
94
  continue-on-error: true
@@ -118,7 +98,7 @@ jobs:
118
98
  with:
119
99
  submodules: true
120
100
  - name: Install, test, and create artifact
121
- uses: ./.github/actions/linux-alpine-node-14/
101
+ uses: ./.github/actions/linux-node-20/
122
102
  env:
123
103
  GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
124
104
 
@@ -152,8 +132,8 @@ jobs:
152
132
  env:
153
133
  GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
154
134
 
155
- build-linux-alpine-node-19:
156
- name: Node.js 19 on Alpine Linux
135
+ build-linux-alpine-node-20:
136
+ name: Node.js 20 on Alpine Linux
157
137
  needs: create-release
158
138
  runs-on: ubuntu-latest
159
139
  continue-on-error: true
@@ -163,6 +143,6 @@ jobs:
163
143
  with:
164
144
  submodules: true
165
145
  - name: Install, test, and create artifact
166
- uses: ./.github/actions/linux-alpine-node-19/
146
+ uses: ./.github/actions/linux-alpine-node-20/
167
147
  env:
168
148
  GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
@@ -14,10 +14,7 @@ jobs:
14
14
  strategy:
15
15
  matrix:
16
16
  os: [ubuntu-latest, macOS-latest, windows-latest]
17
- node-version: [14, 16, 18, 19]
18
- exclude:
19
- - os: windows-latest
20
- node-version: 14
17
+ node-version: [16, 18, 20]
21
18
 
22
19
  steps:
23
20
  - uses: actions/checkout@v3
@@ -33,4 +30,4 @@ jobs:
33
30
  run: |
34
31
  npm i
35
32
  npm run build --if-present
36
- npm test
33
+ npm test && npm run ts-test
package/README.md CHANGED
@@ -352,6 +352,8 @@ console.log('re2_res : ' + re2_res); // prints: re2_res : abc,a,b,c
352
352
 
353
353
  ## Release history
354
354
 
355
+ - 1.18.1 *Support for Node 16, 18, 20 + Darwin arm64 precompiled binaries.*
356
+ - 1.18.0 *Modified TS bindings, added a type test (thx, [Kenichi Kamiya](https://github.com/kachick) and [Jamie Magee](https://github.com/JamieMagee)).*
355
357
  - 1.17.8 *Updated deps, added Node 19 as a pre-compilation target.*
356
358
  - 1.17.7 *Added support for a cross-platform fetching of a pre-compiled version by updating [install-artifact-from-github](https://github.com/uhop/install-artifact-from-github).*
357
359
  - 1.17.6 *Implemented `dotAll`. Thx [Michael Kriese](https://github.com/viceice).*
package/binding.gyp CHANGED
@@ -14,6 +14,7 @@
14
14
  "lib/to_string.cc",
15
15
  "lib/accessors.cc",
16
16
  "lib/util.cc",
17
+ "vendor/re2/bitmap256.cc",
17
18
  "vendor/re2/bitstate.cc",
18
19
  "vendor/re2/compile.cc",
19
20
  "vendor/re2/dfa.cc",
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "re2",
3
- "version": "1.17.8",
3
+ "version": "1.18.1",
4
4
  "description": "Bindings for RE2: fast, safe alternative to backtracking regular expression engines.",
5
5
  "homepage": "https://github.com/uhop/node-re2",
6
6
  "bugs": "https://github.com/uhop/node-re2/issues",
@@ -11,15 +11,18 @@
11
11
  "test": "tests"
12
12
  },
13
13
  "dependencies": {
14
- "install-artifact-from-github": "^1.3.1",
14
+ "install-artifact-from-github": "^1.3.3",
15
15
  "nan": "^2.17.0",
16
- "node-gyp": "^9.3.0"
16
+ "node-gyp": "^9.3.1"
17
17
  },
18
18
  "devDependencies": {
19
- "heya-unit": "^0.3.0"
19
+ "@types/node": "^20.2.3",
20
+ "heya-unit": "^0.3.0",
21
+ "typescript": "^5.0.4"
20
22
  },
21
23
  "scripts": {
22
24
  "test": "node tests/tests.js",
25
+ "ts-test": "tsc",
23
26
  "save-to-github": "save-to-github-cache --artifact build/Release/re2.node",
24
27
  "install": "install-from-cache --artifact build/Release/re2.node --host-var RE2_DOWNLOAD_MIRROR --skip-path-var RE2_DOWNLOAD_SKIP_PATH --skip-ver-var RE2_DOWNLOAD_SKIP_VER || npm run rebuild",
25
28
  "verify-build": "node scripts/verify-build.js",
package/re2.d.ts CHANGED
@@ -1,27 +1,31 @@
1
1
  declare module 're2' {
2
2
 
3
- interface RE2MatchArray<K> extends Array<K> {
4
- index?: number;
5
- input?: K;
3
+ interface RE2BufferExecArray {
4
+ index: number;
5
+ input: Buffer;
6
+ 0: Buffer;
6
7
  groups?: {
7
- [key: string]: string
8
+ [key: string]: Buffer
8
9
  }
9
10
  }
10
11
 
11
- interface RE2ExecArray<K> extends Array<K> {
12
- index: number;
13
- input: K;
12
+ interface RE2BufferMatchArray {
13
+ index?: number;
14
+ input?: Buffer;
15
+ 0: Buffer;
14
16
  groups?: {
15
- [key: string]: string
17
+ [key: string]: Buffer
16
18
  }
17
19
  }
18
20
 
19
21
  interface RE2 extends RegExp {
20
- exec<K extends String | Buffer>(str: K): RE2ExecArray<K> | null;
22
+ exec(str: string): RegExpExecArray | null;
23
+ exec(str: Buffer): RE2BufferExecArray | null;
21
24
 
22
- test(str: string | Buffer): boolean;
25
+ match(str: string): RegExpMatchArray | null;
26
+ match(str: Buffer): RE2BufferMatchArray | null;
23
27
 
24
- match<K extends String | Buffer>(str: K): RE2MatchArray<K> | null;
28
+ test(str: string | Buffer): boolean;
25
29
 
26
30
  replace<K extends String | Buffer>(str: K, replaceValue: string | Buffer): K;
27
31
  replace<K extends String | Buffer>(str: K, replacer: (substring: string, ...args: any[]) => string | Buffer): K;
@@ -32,10 +36,10 @@ declare module 're2' {
32
36
  }
33
37
 
34
38
  interface RE2Constructor extends RegExpConstructor {
35
- new(pattern: Buffer | RegExp | string): RE2;
36
- new(pattern: Buffer | string, flags?: string): RE2;
37
- (pattern: Buffer | RegExp | string): RE2;
38
- (pattern: Buffer | string, flags?: string): RE2;
39
+ new(pattern: Buffer | RegExp | RE2 | string): RE2;
40
+ new(pattern: Buffer | string, flags?: string | Buffer): RE2;
41
+ (pattern: Buffer | RegExp | RE2 | string): RE2;
42
+ (pattern: Buffer | string, flags?: string | Buffer): RE2;
39
43
  readonly prototype: RE2;
40
44
 
41
45
  unicodeWarningLevel: 'nothing' | 'warnOnce' | 'warn' | 'throw';
@@ -0,0 +1,28 @@
1
+ import RE2 from 're2';
2
+
3
+ function assertType<T>(_val: T) {}
4
+
5
+ function test_execTypes() {
6
+ const re = new RE2('quick\\s(brown).+?(?<verb>jumps)', 'ig');
7
+ const result = re.exec('The Quick Brown Fox Jumps Over The Lazy Dog')
8
+ if (!(result && result.groups)) {
9
+ throw 'Unexpected Result'
10
+ }
11
+ assertType<number>(result.index)
12
+ assertType<string>(result.input)
13
+ assertType<string | undefined>(result.groups['verb'])
14
+ }
15
+
16
+ function test_matchTypes() {
17
+ const re = new RE2('quick\\s(brown).+?(?<verb>jumps)', 'ig');
18
+ const result = re.match('The Quick Brown Fox Jumps Over The Lazy Dog')
19
+ if (!(result && result.index && result.input && result.groups)) {
20
+ throw 'Unexpected Result'
21
+ }
22
+ assertType<number>(result.index)
23
+ assertType<string>(result.input)
24
+ assertType<string | undefined>(result.groups['verb'])
25
+ }
26
+
27
+ test_execTypes()
28
+ test_matchTypes()
package/tsconfig.json ADDED
@@ -0,0 +1,20 @@
1
+ {
2
+ "compilerOptions": {
3
+ "noEmit": true,
4
+ "declaration": true,
5
+ "esModuleInterop": true,
6
+ "strict": true,
7
+ "allowUnusedLabels": false,
8
+ "allowUnreachableCode": false,
9
+ "exactOptionalPropertyTypes": true,
10
+ "noFallthroughCasesInSwitch": true,
11
+ "noImplicitOverride": true,
12
+ "noImplicitReturns": true,
13
+ "noPropertyAccessFromIndexSignature": true,
14
+ "noUncheckedIndexedAccess": true,
15
+ "noUnusedLocals": true,
16
+ "noUnusedParameters": true,
17
+
18
+ },
19
+ "include": ["**/*.ts"]
20
+ }
@@ -0,0 +1,44 @@
1
+ // Copyright 2023 The RE2 Authors. All Rights Reserved.
2
+ // Use of this source code is governed by a BSD-style
3
+ // license that can be found in the LICENSE file.
4
+
5
+ #include "re2/bitmap256.h"
6
+
7
+ #include <stdint.h>
8
+
9
+ #include "util/util.h"
10
+ #include "util/logging.h"
11
+
12
+ namespace re2 {
13
+
14
+ int Bitmap256::FindNextSetBit(int c) const {
15
+ DCHECK_GE(c, 0);
16
+ DCHECK_LE(c, 255);
17
+
18
+ // Check the word that contains the bit. Mask out any lower bits.
19
+ int i = c / 64;
20
+ uint64_t word = words_[i] & (~uint64_t{0} << (c % 64));
21
+ if (word != 0)
22
+ return (i * 64) + FindLSBSet(word);
23
+
24
+ // Check any following words.
25
+ i++;
26
+ switch (i) {
27
+ case 1:
28
+ if (words_[1] != 0)
29
+ return (1 * 64) + FindLSBSet(words_[1]);
30
+ FALLTHROUGH_INTENDED;
31
+ case 2:
32
+ if (words_[2] != 0)
33
+ return (2 * 64) + FindLSBSet(words_[2]);
34
+ FALLTHROUGH_INTENDED;
35
+ case 3:
36
+ if (words_[3] != 0)
37
+ return (3 * 64) + FindLSBSet(words_[3]);
38
+ FALLTHROUGH_INTENDED;
39
+ default:
40
+ return -1;
41
+ }
42
+ }
43
+
44
+ } // namespace re2
@@ -11,7 +11,6 @@
11
11
  #include <stdint.h>
12
12
  #include <string.h>
13
13
 
14
- #include "util/util.h"
15
14
  #include "util/logging.h"
16
15
 
17
16
  namespace re2 {
@@ -82,36 +81,6 @@ class Bitmap256 {
82
81
  uint64_t words_[4];
83
82
  };
84
83
 
85
- int Bitmap256::FindNextSetBit(int c) const {
86
- DCHECK_GE(c, 0);
87
- DCHECK_LE(c, 255);
88
-
89
- // Check the word that contains the bit. Mask out any lower bits.
90
- int i = c / 64;
91
- uint64_t word = words_[i] & (~uint64_t{0} << (c % 64));
92
- if (word != 0)
93
- return (i * 64) + FindLSBSet(word);
94
-
95
- // Check any following words.
96
- i++;
97
- switch (i) {
98
- case 1:
99
- if (words_[1] != 0)
100
- return (1 * 64) + FindLSBSet(words_[1]);
101
- FALLTHROUGH_INTENDED;
102
- case 2:
103
- if (words_[2] != 0)
104
- return (2 * 64) + FindLSBSet(words_[2]);
105
- FALLTHROUGH_INTENDED;
106
- case 3:
107
- if (words_[3] != 0)
108
- return (3 * 64) + FindLSBSet(words_[3]);
109
- FALLTHROUGH_INTENDED;
110
- default:
111
- return -1;
112
- }
113
- }
114
-
115
84
  } // namespace re2
116
85
 
117
86
  #endif // RE2_BITMAP256_H_
@@ -789,8 +789,8 @@ void Compiler::AddRuneRangeUTF8(Rune lo, Rune hi, bool foldcase) {
789
789
  // Should not be called.
790
790
  Frag Compiler::Copy(Frag arg) {
791
791
  // We're using WalkExponential; there should be no copying.
792
- LOG(DFATAL) << "Compiler::Copy called!";
793
792
  failed_ = true;
793
+ LOG(DFATAL) << "Compiler::Copy called!";
794
794
  return NoMatch();
795
795
  }
796
796
 
@@ -916,8 +916,8 @@ Frag Compiler::PostVisit(Regexp* re, Frag, Frag, Frag* child_frags,
916
916
  CharClass* cc = re->cc();
917
917
  if (cc->empty()) {
918
918
  // This can't happen.
919
- LOG(DFATAL) << "No ranges in char class";
920
919
  failed_ = true;
920
+ LOG(DFATAL) << "No ranges in char class";
921
921
  return NoMatch();
922
922
  }
923
923
 
@@ -974,8 +974,8 @@ Frag Compiler::PostVisit(Regexp* re, Frag, Frag, Frag* child_frags,
974
974
  case kRegexpNoWordBoundary:
975
975
  return EmptyWidth(kEmptyNonWordBoundary);
976
976
  }
977
- LOG(DFATAL) << "Missing case in Compiler: " << re->op();
978
977
  failed_ = true;
978
+ LOG(DFATAL) << "Missing case in Compiler: " << re->op();
979
979
  return NoMatch();
980
980
  }
981
981
 
package/vendor/re2/dfa.cc CHANGED
@@ -1675,8 +1675,8 @@ bool DFA::AnalyzeSearch(SearchParams* params) {
1675
1675
  if (!AnalyzeSearchHelper(params, info, flags)) {
1676
1676
  ResetCache(params->cache_lock);
1677
1677
  if (!AnalyzeSearchHelper(params, info, flags)) {
1678
- LOG(DFATAL) << "Failed to analyze start state.";
1679
1678
  params->failed = true;
1679
+ LOG(DFATAL) << "Failed to analyze start state.";
1680
1680
  return false;
1681
1681
  }
1682
1682
  }
@@ -9,8 +9,10 @@
9
9
  #include <string>
10
10
  #include <vector>
11
11
 
12
+ #include "re2/filtered_re2.h"
12
13
  #include "re2/re2.h"
13
14
  #include "re2/regexp.h"
15
+ #include "re2/set.h"
14
16
  #include "re2/walker-inl.h"
15
17
 
16
18
  using re2::StringPiece;
@@ -96,7 +98,7 @@ class SubstringWalker : public re2::Regexp::Walker<int> {
96
98
  };
97
99
 
98
100
  void TestOneInput(StringPiece pattern, const RE2::Options& options,
99
- StringPiece text) {
101
+ RE2::Anchor anchor, StringPiece text) {
100
102
  // Crudely limit the use of ., \p, \P, \d, \D, \s, \S, \w and \W.
101
103
  // Otherwise, we will waste time on inputs that have long runs of various
102
104
  // character classes. The fuzzer has shown itself to be easily capable of
@@ -131,6 +133,9 @@ void TestOneInput(StringPiece pattern, const RE2::Options& options,
131
133
  if (backslash_p > 1)
132
134
  return;
133
135
 
136
+ // Iterate just once when fuzzing. Otherwise, we easily get bogged down
137
+ // and coverage is unlikely to improve despite significant expense.
138
+ RE2::FUZZING_ONLY_set_maximum_global_replace_count(1);
134
139
  // The default is 1000. Even 100 turned out to be too generous
135
140
  // for fuzzing, empirically speaking, so let's try 10 instead.
136
141
  re2::Regexp::FUZZING_ONLY_set_maximum_repeat_count(10);
@@ -206,6 +211,29 @@ void TestOneInput(StringPiece pattern, const RE2::Options& options,
206
211
  dummy += re.NamedCapturingGroups().size();
207
212
  dummy += re.CapturingGroupNames().size();
208
213
  dummy += RE2::QuoteMeta(pattern).size();
214
+
215
+ RE2::Set set(options, anchor);
216
+ int index = set.Add(pattern, /*error=*/NULL); // -1 on error
217
+ if (index != -1 && set.Compile()) {
218
+ std::vector<int> matches;
219
+ set.Match(text, &matches);
220
+ }
221
+
222
+ re2::FilteredRE2 filter;
223
+ index = -1; // not clobbered on error
224
+ filter.Add(pattern, options, &index);
225
+ if (index != -1) {
226
+ std::vector<std::string> atoms;
227
+ filter.Compile(&atoms);
228
+ // Pretend that all atoms match, which
229
+ // triggers the AND-OR tree maximally.
230
+ std::vector<int> matched_atoms;
231
+ matched_atoms.reserve(atoms.size());
232
+ for (size_t i = 0; i < atoms.size(); ++i)
233
+ matched_atoms.push_back(static_cast<int>(i));
234
+ std::vector<int> matches;
235
+ filter.AllMatches(text, matched_atoms, &matches);
236
+ }
209
237
  }
210
238
 
211
239
  // Entry point for libFuzzer.
@@ -239,9 +267,17 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
239
267
  options.set_word_boundary(fdp.ConsumeBool());
240
268
  options.set_one_line(fdp.ConsumeBool());
241
269
 
270
+ // ConsumeEnum<RE2::Anchor>() would require RE2::Anchor to specify
271
+ // kMaxValue, so just use PickValueInArray<RE2::Anchor>() instead.
272
+ RE2::Anchor anchor = fdp.PickValueInArray<RE2::Anchor>({
273
+ RE2::UNANCHORED,
274
+ RE2::ANCHOR_START,
275
+ RE2::ANCHOR_BOTH,
276
+ });
277
+
242
278
  std::string pattern = fdp.ConsumeRandomLengthString(999);
243
279
  std::string text = fdp.ConsumeRandomLengthString(999);
244
280
 
245
- TestOneInput(pattern, options, text);
281
+ TestOneInput(pattern, options, anchor, text);
246
282
  return 0;
247
283
  }
@@ -1589,8 +1589,6 @@ static bool ParseEscape(StringPiece* s, Rune* rp,
1589
1589
  // return true;
1590
1590
  }
1591
1591
 
1592
- LOG(DFATAL) << "Not reached in ParseEscape.";
1593
-
1594
1592
  BadEscape:
1595
1593
  // Unrecognized escape sequence.
1596
1594
  status->set_code(kRegexpBadEscape);
@@ -2059,8 +2057,8 @@ bool Regexp::ParseState::ParsePerlFlags(StringPiece* s) {
2059
2057
 
2060
2058
  // Caller is supposed to check this.
2061
2059
  if (!(flags_ & PerlX) || t.size() < 2 || t[0] != '(' || t[1] != '?') {
2062
- LOG(DFATAL) << "Bad call to ParseState::ParsePerlFlags";
2063
2060
  status_->set_code(kRegexpInternalError);
2061
+ LOG(DFATAL) << "Bad call to ParseState::ParsePerlFlags";
2064
2062
  return false;
2065
2063
  }
2066
2064
 
@@ -7,6 +7,7 @@
7
7
  #include <stddef.h>
8
8
  #include <stdint.h>
9
9
  #include <string>
10
+ #include <utility>
10
11
  #include <vector>
11
12
 
12
13
  #include "util/util.h"
@@ -21,9 +22,6 @@ namespace re2 {
21
22
 
22
23
  static const bool ExtraDebug = false;
23
24
 
24
- typedef std::set<std::string>::iterator SSIter;
25
- typedef std::set<std::string>::const_iterator ConstSSIter;
26
-
27
25
  // Initializes a Prefilter, allocating subs_ as necessary.
28
26
  Prefilter::Prefilter(Op op) {
29
27
  op_ = op;
@@ -140,7 +138,7 @@ Prefilter* Prefilter::Or(Prefilter* a, Prefilter* b) {
140
138
  return AndOr(OR, a, b);
141
139
  }
142
140
 
143
- static void SimplifyStringSet(std::set<std::string>* ss) {
141
+ void Prefilter::SimplifyStringSet(SSet* ss) {
144
142
  // Now make sure that the strings aren't redundant. For example, if
145
143
  // we know "ab" is a required string, then it doesn't help at all to
146
144
  // know that "abc" is also a required string, so delete "abc". This
@@ -149,13 +147,19 @@ static void SimplifyStringSet(std::set<std::string>* ss) {
149
147
  // candidate for match, so further matching "abc" is redundant.
150
148
  // Note that we must ignore "" because find() would find it at the
151
149
  // start of everything and thus we would end up erasing everything.
152
- for (SSIter i = ss->begin(); i != ss->end(); ++i) {
153
- if (i->empty())
154
- continue;
150
+ //
151
+ // The SSet sorts strings by length, then lexicographically. Note that
152
+ // smaller strings appear first and all strings must be unique. These
153
+ // observations let us skip string comparisons when possible.
154
+ SSIter i = ss->begin();
155
+ if (i != ss->end() && i->empty()) {
156
+ ++i;
157
+ }
158
+ for (; i != ss->end(); ++i) {
155
159
  SSIter j = i;
156
160
  ++j;
157
161
  while (j != ss->end()) {
158
- if (j->find(*i) != std::string::npos) {
162
+ if (j->size() > i->size() && j->find(*i) != std::string::npos) {
159
163
  j = ss->erase(j);
160
164
  continue;
161
165
  }
@@ -164,7 +168,7 @@ static void SimplifyStringSet(std::set<std::string>* ss) {
164
168
  }
165
169
  }
166
170
 
167
- Prefilter* Prefilter::OrStrings(std::set<std::string>* ss) {
171
+ Prefilter* Prefilter::OrStrings(SSet* ss) {
168
172
  Prefilter* or_prefilter = new Prefilter(NONE);
169
173
  SimplifyStringSet(ss);
170
174
  for (SSIter i = ss->begin(); i != ss->end(); ++i)
@@ -226,14 +230,14 @@ class Prefilter::Info {
226
230
  // Caller takes ownership of the Prefilter.
227
231
  Prefilter* TakeMatch();
228
232
 
229
- std::set<std::string>& exact() { return exact_; }
233
+ SSet& exact() { return exact_; }
230
234
 
231
235
  bool is_exact() const { return is_exact_; }
232
236
 
233
237
  class Walker;
234
238
 
235
239
  private:
236
- std::set<std::string> exact_;
240
+ SSet exact_;
237
241
 
238
242
  // When is_exact_ is true, the strings that match
239
243
  // are placed in exact_. When it is no longer an exact
@@ -286,18 +290,7 @@ std::string Prefilter::Info::ToString() {
286
290
  return "";
287
291
  }
288
292
 
289
- // Add the strings from src to dst.
290
- static void CopyIn(const std::set<std::string>& src,
291
- std::set<std::string>* dst) {
292
- for (ConstSSIter i = src.begin(); i != src.end(); ++i)
293
- dst->insert(*i);
294
- }
295
-
296
- // Add the cross-product of a and b to dst.
297
- // (For each string i in a and j in b, add i+j.)
298
- static void CrossProduct(const std::set<std::string>& a,
299
- const std::set<std::string>& b,
300
- std::set<std::string>* dst) {
293
+ void Prefilter::CrossProduct(const SSet& a, const SSet& b, SSet* dst) {
301
294
  for (ConstSSIter i = a.begin(); i != a.end(); ++i)
302
295
  for (ConstSSIter j = b.begin(); j != b.end(); ++j)
303
296
  dst->insert(*i + *j);
@@ -343,8 +336,14 @@ Prefilter::Info* Prefilter::Info::Alt(Info* a, Info* b) {
343
336
  Info *ab = new Info();
344
337
 
345
338
  if (a->is_exact_ && b->is_exact_) {
346
- CopyIn(a->exact_, &ab->exact_);
347
- CopyIn(b->exact_, &ab->exact_);
339
+ // Avoid string copies by moving the larger exact_ set into
340
+ // ab directly, then merge in the smaller set.
341
+ if (a->exact_.size() < b->exact_.size()) {
342
+ using std::swap;
343
+ swap(a, b);
344
+ }
345
+ ab->exact_ = std::move(a->exact_);
346
+ ab->exact_.insert(b->exact_.begin(), b->exact_.end());
348
347
  ab->is_exact_ = true;
349
348
  } else {
350
349
  // Either a or b has is_exact_ = false. If the other
@@ -532,8 +531,8 @@ Prefilter::Info* Prefilter::Info::Walker::PostVisit(
532
531
  switch (re->op()) {
533
532
  default:
534
533
  case kRegexpRepeat:
535
- LOG(DFATAL) << "Bad regexp op " << re->op();
536
534
  info = EmptyString();
535
+ LOG(DFATAL) << "Bad regexp op " << re->op();
537
536
  break;
538
537
 
539
538
  case kRegexpNoMatch: