re2 1.17.8 → 1.18.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/actions/{linux-alpine-node-14 → linux-alpine-node-20}/Dockerfile +1 -1
- package/.github/actions/linux-alpine-node-20/action.yml +7 -0
- package/.github/actions/linux-node-12/Dockerfile +1 -1
- package/.github/actions/{linux-node-19 → linux-node-20}/Dockerfile +1 -1
- package/.github/actions/{linux-node-19 → linux-node-20}/action.yml +3 -3
- package/.github/workflows/build.yml +15 -35
- package/.github/workflows/tests.yml +2 -5
- package/README.md +2 -0
- package/binding.gyp +1 -0
- package/package.json +7 -4
- package/re2.d.ts +19 -15
- package/ts-tests/test-types.ts +28 -0
- package/tsconfig.json +20 -0
- package/vendor/re2/bitmap256.cc +44 -0
- package/vendor/re2/bitmap256.h +0 -31
- package/vendor/re2/compile.cc +3 -3
- package/vendor/re2/dfa.cc +1 -1
- package/vendor/re2/fuzzing/re2_fuzzer.cc +38 -2
- package/vendor/re2/parse.cc +1 -3
- package/vendor/re2/prefilter.cc +25 -26
- package/vendor/re2/prefilter.h +23 -1
- package/vendor/re2/prog.cc +1 -1
- package/vendor/re2/re2.cc +77 -47
- package/vendor/re2/re2.h +49 -35
- package/vendor/re2/regexp.cc +24 -14
- package/vendor/re2/set.cc +2 -2
- package/vendor/re2/simplify.cc +2 -2
- package/vendor/re2/testing/filtered_re2_test.cc +2 -1
- package/vendor/re2/unicode.py +2 -2
- package/vendor/re2/unicode_groups.cc +150 -75
- package/vendor/util/fuzz.cc +4 -4
- package/vendor/util/mutex.h +18 -2
- package/vendor/util/pcre.h +1 -1
- package/vendor/util/rune.cc +4 -4
- package/.github/actions/linux-alpine-node-14/action.yml +0 -7
- package/.github/actions/linux-alpine-node-14/entrypoint.sh +0 -8
- package/.github/actions/linux-alpine-node-19/Dockerfile +0 -6
- package/.github/actions/linux-alpine-node-19/action.yml +0 -7
- /package/.github/actions/{linux-alpine-node-19 → linux-alpine-node-20}/entrypoint.sh +0 -0
- /package/.github/actions/{linux-node-19 → linux-node-20}/entrypoint.sh +0 -0
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
FROM centos:centos7
|
|
2
2
|
|
|
3
3
|
RUN yum install -y centos-release-scl && \
|
|
4
|
-
INSTALL_PKGS="devtoolset-8 python3 make" && \
|
|
4
|
+
INSTALL_PKGS="devtoolset-8 python3 make git" && \
|
|
5
5
|
yum install -y --setopt=tsflags=nodocs $INSTALL_PKGS && \
|
|
6
6
|
rpm -V $INSTALL_PKGS && \
|
|
7
7
|
yum -y clean all --enablerepo='*'
|
|
@@ -1,10 +1,10 @@
|
|
|
1
|
-
name: 'Create a binary artifact for Node
|
|
2
|
-
description: 'Create a binary artifact for Node
|
|
1
|
+
name: 'Create a binary artifact for Node 20 on Linux'
|
|
2
|
+
description: 'Create a binary artifact for Node 20 on Linux using node:20-buster'
|
|
3
3
|
inputs:
|
|
4
4
|
node-version:
|
|
5
5
|
description: 'Node.js version'
|
|
6
6
|
required: false
|
|
7
|
-
default: '
|
|
7
|
+
default: '20'
|
|
8
8
|
runs:
|
|
9
9
|
using: 'docker'
|
|
10
10
|
image: 'Dockerfile'
|
|
@@ -13,15 +13,13 @@ jobs:
|
|
|
13
13
|
runs-on: ubuntu-latest
|
|
14
14
|
|
|
15
15
|
steps:
|
|
16
|
-
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
draft: false
|
|
24
|
-
prerelease: false
|
|
16
|
+
- uses: actions/checkout@v3
|
|
17
|
+
- env:
|
|
18
|
+
GH_TOKEN: ${{github.token}}
|
|
19
|
+
run: |
|
|
20
|
+
REF=${{github.ref}}
|
|
21
|
+
TAG=${REF#"refs/tags/"}
|
|
22
|
+
gh release create -t "Release ${TAG}" -n "" "${{github.ref}}"
|
|
25
23
|
|
|
26
24
|
build:
|
|
27
25
|
name: Node.js ${{matrix.node-version}} on ${{matrix.os}}
|
|
@@ -31,10 +29,7 @@ jobs:
|
|
|
31
29
|
strategy:
|
|
32
30
|
matrix:
|
|
33
31
|
os: [macOS-latest, windows-latest]
|
|
34
|
-
node-version: [
|
|
35
|
-
exclude:
|
|
36
|
-
- os: windows-latest
|
|
37
|
-
node-version: 14
|
|
32
|
+
node-version: [16, 18, 20]
|
|
38
33
|
|
|
39
34
|
steps:
|
|
40
35
|
- uses: actions/checkout@v3
|
|
@@ -64,7 +59,7 @@ jobs:
|
|
|
64
59
|
|
|
65
60
|
strategy:
|
|
66
61
|
matrix:
|
|
67
|
-
node-version: [
|
|
62
|
+
node-version: [16]
|
|
68
63
|
|
|
69
64
|
steps:
|
|
70
65
|
- uses: actions/checkout@v3
|
|
@@ -92,23 +87,8 @@ jobs:
|
|
|
92
87
|
env:
|
|
93
88
|
GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
|
|
94
89
|
|
|
95
|
-
build-linux-node-
|
|
96
|
-
name: Node.js
|
|
97
|
-
needs: create-release
|
|
98
|
-
runs-on: ubuntu-latest
|
|
99
|
-
continue-on-error: true
|
|
100
|
-
|
|
101
|
-
steps:
|
|
102
|
-
- uses: actions/checkout@v3
|
|
103
|
-
with:
|
|
104
|
-
submodules: true
|
|
105
|
-
- name: Install, test, and create artifact
|
|
106
|
-
uses: ./.github/actions/linux-node-19/
|
|
107
|
-
env:
|
|
108
|
-
GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
|
|
109
|
-
|
|
110
|
-
build-linux-alpine-node-14:
|
|
111
|
-
name: Node.js 14 on Alpine Linux
|
|
90
|
+
build-linux-node-20:
|
|
91
|
+
name: Node.js 20 on Debian Buster
|
|
112
92
|
needs: create-release
|
|
113
93
|
runs-on: ubuntu-latest
|
|
114
94
|
continue-on-error: true
|
|
@@ -118,7 +98,7 @@ jobs:
|
|
|
118
98
|
with:
|
|
119
99
|
submodules: true
|
|
120
100
|
- name: Install, test, and create artifact
|
|
121
|
-
uses: ./.github/actions/linux-
|
|
101
|
+
uses: ./.github/actions/linux-node-20/
|
|
122
102
|
env:
|
|
123
103
|
GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
|
|
124
104
|
|
|
@@ -152,8 +132,8 @@ jobs:
|
|
|
152
132
|
env:
|
|
153
133
|
GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
|
|
154
134
|
|
|
155
|
-
build-linux-alpine-node-
|
|
156
|
-
name: Node.js
|
|
135
|
+
build-linux-alpine-node-20:
|
|
136
|
+
name: Node.js 20 on Alpine Linux
|
|
157
137
|
needs: create-release
|
|
158
138
|
runs-on: ubuntu-latest
|
|
159
139
|
continue-on-error: true
|
|
@@ -163,6 +143,6 @@ jobs:
|
|
|
163
143
|
with:
|
|
164
144
|
submodules: true
|
|
165
145
|
- name: Install, test, and create artifact
|
|
166
|
-
uses: ./.github/actions/linux-alpine-node-
|
|
146
|
+
uses: ./.github/actions/linux-alpine-node-20/
|
|
167
147
|
env:
|
|
168
148
|
GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
|
|
@@ -14,10 +14,7 @@ jobs:
|
|
|
14
14
|
strategy:
|
|
15
15
|
matrix:
|
|
16
16
|
os: [ubuntu-latest, macOS-latest, windows-latest]
|
|
17
|
-
node-version: [
|
|
18
|
-
exclude:
|
|
19
|
-
- os: windows-latest
|
|
20
|
-
node-version: 14
|
|
17
|
+
node-version: [16, 18, 20]
|
|
21
18
|
|
|
22
19
|
steps:
|
|
23
20
|
- uses: actions/checkout@v3
|
|
@@ -33,4 +30,4 @@ jobs:
|
|
|
33
30
|
run: |
|
|
34
31
|
npm i
|
|
35
32
|
npm run build --if-present
|
|
36
|
-
npm test
|
|
33
|
+
npm test && npm run ts-test
|
package/README.md
CHANGED
|
@@ -352,6 +352,8 @@ console.log('re2_res : ' + re2_res); // prints: re2_res : abc,a,b,c
|
|
|
352
352
|
|
|
353
353
|
## Release history
|
|
354
354
|
|
|
355
|
+
- 1.18.1 *Support for Node 16, 18, 20 + Darwin arm64 precompiled binaries.*
|
|
356
|
+
- 1.18.0 *Modified TS bindings, added a type test (thx, [Kenichi Kamiya](https://github.com/kachick) and [Jamie Magee](https://github.com/JamieMagee)).*
|
|
355
357
|
- 1.17.8 *Updated deps, added Node 19 as a pre-compilation target.*
|
|
356
358
|
- 1.17.7 *Added support for a cross-platform fetching of a pre-compiled version by updating [install-artifact-from-github](https://github.com/uhop/install-artifact-from-github).*
|
|
357
359
|
- 1.17.6 *Implemented `dotAll`. Thx [Michael Kriese](https://github.com/viceice).*
|
package/binding.gyp
CHANGED
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "re2",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.18.1",
|
|
4
4
|
"description": "Bindings for RE2: fast, safe alternative to backtracking regular expression engines.",
|
|
5
5
|
"homepage": "https://github.com/uhop/node-re2",
|
|
6
6
|
"bugs": "https://github.com/uhop/node-re2/issues",
|
|
@@ -11,15 +11,18 @@
|
|
|
11
11
|
"test": "tests"
|
|
12
12
|
},
|
|
13
13
|
"dependencies": {
|
|
14
|
-
"install-artifact-from-github": "^1.3.
|
|
14
|
+
"install-artifact-from-github": "^1.3.3",
|
|
15
15
|
"nan": "^2.17.0",
|
|
16
|
-
"node-gyp": "^9.3.
|
|
16
|
+
"node-gyp": "^9.3.1"
|
|
17
17
|
},
|
|
18
18
|
"devDependencies": {
|
|
19
|
-
"
|
|
19
|
+
"@types/node": "^20.2.3",
|
|
20
|
+
"heya-unit": "^0.3.0",
|
|
21
|
+
"typescript": "^5.0.4"
|
|
20
22
|
},
|
|
21
23
|
"scripts": {
|
|
22
24
|
"test": "node tests/tests.js",
|
|
25
|
+
"ts-test": "tsc",
|
|
23
26
|
"save-to-github": "save-to-github-cache --artifact build/Release/re2.node",
|
|
24
27
|
"install": "install-from-cache --artifact build/Release/re2.node --host-var RE2_DOWNLOAD_MIRROR --skip-path-var RE2_DOWNLOAD_SKIP_PATH --skip-ver-var RE2_DOWNLOAD_SKIP_VER || npm run rebuild",
|
|
25
28
|
"verify-build": "node scripts/verify-build.js",
|
package/re2.d.ts
CHANGED
|
@@ -1,27 +1,31 @@
|
|
|
1
1
|
declare module 're2' {
|
|
2
2
|
|
|
3
|
-
interface
|
|
4
|
-
index
|
|
5
|
-
input
|
|
3
|
+
interface RE2BufferExecArray {
|
|
4
|
+
index: number;
|
|
5
|
+
input: Buffer;
|
|
6
|
+
0: Buffer;
|
|
6
7
|
groups?: {
|
|
7
|
-
[key: string]:
|
|
8
|
+
[key: string]: Buffer
|
|
8
9
|
}
|
|
9
10
|
}
|
|
10
11
|
|
|
11
|
-
interface
|
|
12
|
-
index
|
|
13
|
-
input
|
|
12
|
+
interface RE2BufferMatchArray {
|
|
13
|
+
index?: number;
|
|
14
|
+
input?: Buffer;
|
|
15
|
+
0: Buffer;
|
|
14
16
|
groups?: {
|
|
15
|
-
[key: string]:
|
|
17
|
+
[key: string]: Buffer
|
|
16
18
|
}
|
|
17
19
|
}
|
|
18
20
|
|
|
19
21
|
interface RE2 extends RegExp {
|
|
20
|
-
exec
|
|
22
|
+
exec(str: string): RegExpExecArray | null;
|
|
23
|
+
exec(str: Buffer): RE2BufferExecArray | null;
|
|
21
24
|
|
|
22
|
-
|
|
25
|
+
match(str: string): RegExpMatchArray | null;
|
|
26
|
+
match(str: Buffer): RE2BufferMatchArray | null;
|
|
23
27
|
|
|
24
|
-
|
|
28
|
+
test(str: string | Buffer): boolean;
|
|
25
29
|
|
|
26
30
|
replace<K extends String | Buffer>(str: K, replaceValue: string | Buffer): K;
|
|
27
31
|
replace<K extends String | Buffer>(str: K, replacer: (substring: string, ...args: any[]) => string | Buffer): K;
|
|
@@ -32,10 +36,10 @@ declare module 're2' {
|
|
|
32
36
|
}
|
|
33
37
|
|
|
34
38
|
interface RE2Constructor extends RegExpConstructor {
|
|
35
|
-
new(pattern: Buffer | RegExp | string): RE2;
|
|
36
|
-
new(pattern: Buffer | string, flags?: string): RE2;
|
|
37
|
-
(pattern: Buffer | RegExp | string): RE2;
|
|
38
|
-
(pattern: Buffer | string, flags?: string): RE2;
|
|
39
|
+
new(pattern: Buffer | RegExp | RE2 | string): RE2;
|
|
40
|
+
new(pattern: Buffer | string, flags?: string | Buffer): RE2;
|
|
41
|
+
(pattern: Buffer | RegExp | RE2 | string): RE2;
|
|
42
|
+
(pattern: Buffer | string, flags?: string | Buffer): RE2;
|
|
39
43
|
readonly prototype: RE2;
|
|
40
44
|
|
|
41
45
|
unicodeWarningLevel: 'nothing' | 'warnOnce' | 'warn' | 'throw';
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
import RE2 from 're2';
|
|
2
|
+
|
|
3
|
+
function assertType<T>(_val: T) {}
|
|
4
|
+
|
|
5
|
+
function test_execTypes() {
|
|
6
|
+
const re = new RE2('quick\\s(brown).+?(?<verb>jumps)', 'ig');
|
|
7
|
+
const result = re.exec('The Quick Brown Fox Jumps Over The Lazy Dog')
|
|
8
|
+
if (!(result && result.groups)) {
|
|
9
|
+
throw 'Unexpected Result'
|
|
10
|
+
}
|
|
11
|
+
assertType<number>(result.index)
|
|
12
|
+
assertType<string>(result.input)
|
|
13
|
+
assertType<string | undefined>(result.groups['verb'])
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
function test_matchTypes() {
|
|
17
|
+
const re = new RE2('quick\\s(brown).+?(?<verb>jumps)', 'ig');
|
|
18
|
+
const result = re.match('The Quick Brown Fox Jumps Over The Lazy Dog')
|
|
19
|
+
if (!(result && result.index && result.input && result.groups)) {
|
|
20
|
+
throw 'Unexpected Result'
|
|
21
|
+
}
|
|
22
|
+
assertType<number>(result.index)
|
|
23
|
+
assertType<string>(result.input)
|
|
24
|
+
assertType<string | undefined>(result.groups['verb'])
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
test_execTypes()
|
|
28
|
+
test_matchTypes()
|
package/tsconfig.json
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
{
|
|
2
|
+
"compilerOptions": {
|
|
3
|
+
"noEmit": true,
|
|
4
|
+
"declaration": true,
|
|
5
|
+
"esModuleInterop": true,
|
|
6
|
+
"strict": true,
|
|
7
|
+
"allowUnusedLabels": false,
|
|
8
|
+
"allowUnreachableCode": false,
|
|
9
|
+
"exactOptionalPropertyTypes": true,
|
|
10
|
+
"noFallthroughCasesInSwitch": true,
|
|
11
|
+
"noImplicitOverride": true,
|
|
12
|
+
"noImplicitReturns": true,
|
|
13
|
+
"noPropertyAccessFromIndexSignature": true,
|
|
14
|
+
"noUncheckedIndexedAccess": true,
|
|
15
|
+
"noUnusedLocals": true,
|
|
16
|
+
"noUnusedParameters": true,
|
|
17
|
+
|
|
18
|
+
},
|
|
19
|
+
"include": ["**/*.ts"]
|
|
20
|
+
}
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
// Copyright 2023 The RE2 Authors. All Rights Reserved.
|
|
2
|
+
// Use of this source code is governed by a BSD-style
|
|
3
|
+
// license that can be found in the LICENSE file.
|
|
4
|
+
|
|
5
|
+
#include "re2/bitmap256.h"
|
|
6
|
+
|
|
7
|
+
#include <stdint.h>
|
|
8
|
+
|
|
9
|
+
#include "util/util.h"
|
|
10
|
+
#include "util/logging.h"
|
|
11
|
+
|
|
12
|
+
namespace re2 {
|
|
13
|
+
|
|
14
|
+
int Bitmap256::FindNextSetBit(int c) const {
|
|
15
|
+
DCHECK_GE(c, 0);
|
|
16
|
+
DCHECK_LE(c, 255);
|
|
17
|
+
|
|
18
|
+
// Check the word that contains the bit. Mask out any lower bits.
|
|
19
|
+
int i = c / 64;
|
|
20
|
+
uint64_t word = words_[i] & (~uint64_t{0} << (c % 64));
|
|
21
|
+
if (word != 0)
|
|
22
|
+
return (i * 64) + FindLSBSet(word);
|
|
23
|
+
|
|
24
|
+
// Check any following words.
|
|
25
|
+
i++;
|
|
26
|
+
switch (i) {
|
|
27
|
+
case 1:
|
|
28
|
+
if (words_[1] != 0)
|
|
29
|
+
return (1 * 64) + FindLSBSet(words_[1]);
|
|
30
|
+
FALLTHROUGH_INTENDED;
|
|
31
|
+
case 2:
|
|
32
|
+
if (words_[2] != 0)
|
|
33
|
+
return (2 * 64) + FindLSBSet(words_[2]);
|
|
34
|
+
FALLTHROUGH_INTENDED;
|
|
35
|
+
case 3:
|
|
36
|
+
if (words_[3] != 0)
|
|
37
|
+
return (3 * 64) + FindLSBSet(words_[3]);
|
|
38
|
+
FALLTHROUGH_INTENDED;
|
|
39
|
+
default:
|
|
40
|
+
return -1;
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
} // namespace re2
|
package/vendor/re2/bitmap256.h
CHANGED
|
@@ -11,7 +11,6 @@
|
|
|
11
11
|
#include <stdint.h>
|
|
12
12
|
#include <string.h>
|
|
13
13
|
|
|
14
|
-
#include "util/util.h"
|
|
15
14
|
#include "util/logging.h"
|
|
16
15
|
|
|
17
16
|
namespace re2 {
|
|
@@ -82,36 +81,6 @@ class Bitmap256 {
|
|
|
82
81
|
uint64_t words_[4];
|
|
83
82
|
};
|
|
84
83
|
|
|
85
|
-
int Bitmap256::FindNextSetBit(int c) const {
|
|
86
|
-
DCHECK_GE(c, 0);
|
|
87
|
-
DCHECK_LE(c, 255);
|
|
88
|
-
|
|
89
|
-
// Check the word that contains the bit. Mask out any lower bits.
|
|
90
|
-
int i = c / 64;
|
|
91
|
-
uint64_t word = words_[i] & (~uint64_t{0} << (c % 64));
|
|
92
|
-
if (word != 0)
|
|
93
|
-
return (i * 64) + FindLSBSet(word);
|
|
94
|
-
|
|
95
|
-
// Check any following words.
|
|
96
|
-
i++;
|
|
97
|
-
switch (i) {
|
|
98
|
-
case 1:
|
|
99
|
-
if (words_[1] != 0)
|
|
100
|
-
return (1 * 64) + FindLSBSet(words_[1]);
|
|
101
|
-
FALLTHROUGH_INTENDED;
|
|
102
|
-
case 2:
|
|
103
|
-
if (words_[2] != 0)
|
|
104
|
-
return (2 * 64) + FindLSBSet(words_[2]);
|
|
105
|
-
FALLTHROUGH_INTENDED;
|
|
106
|
-
case 3:
|
|
107
|
-
if (words_[3] != 0)
|
|
108
|
-
return (3 * 64) + FindLSBSet(words_[3]);
|
|
109
|
-
FALLTHROUGH_INTENDED;
|
|
110
|
-
default:
|
|
111
|
-
return -1;
|
|
112
|
-
}
|
|
113
|
-
}
|
|
114
|
-
|
|
115
84
|
} // namespace re2
|
|
116
85
|
|
|
117
86
|
#endif // RE2_BITMAP256_H_
|
package/vendor/re2/compile.cc
CHANGED
|
@@ -789,8 +789,8 @@ void Compiler::AddRuneRangeUTF8(Rune lo, Rune hi, bool foldcase) {
|
|
|
789
789
|
// Should not be called.
|
|
790
790
|
Frag Compiler::Copy(Frag arg) {
|
|
791
791
|
// We're using WalkExponential; there should be no copying.
|
|
792
|
-
LOG(DFATAL) << "Compiler::Copy called!";
|
|
793
792
|
failed_ = true;
|
|
793
|
+
LOG(DFATAL) << "Compiler::Copy called!";
|
|
794
794
|
return NoMatch();
|
|
795
795
|
}
|
|
796
796
|
|
|
@@ -916,8 +916,8 @@ Frag Compiler::PostVisit(Regexp* re, Frag, Frag, Frag* child_frags,
|
|
|
916
916
|
CharClass* cc = re->cc();
|
|
917
917
|
if (cc->empty()) {
|
|
918
918
|
// This can't happen.
|
|
919
|
-
LOG(DFATAL) << "No ranges in char class";
|
|
920
919
|
failed_ = true;
|
|
920
|
+
LOG(DFATAL) << "No ranges in char class";
|
|
921
921
|
return NoMatch();
|
|
922
922
|
}
|
|
923
923
|
|
|
@@ -974,8 +974,8 @@ Frag Compiler::PostVisit(Regexp* re, Frag, Frag, Frag* child_frags,
|
|
|
974
974
|
case kRegexpNoWordBoundary:
|
|
975
975
|
return EmptyWidth(kEmptyNonWordBoundary);
|
|
976
976
|
}
|
|
977
|
-
LOG(DFATAL) << "Missing case in Compiler: " << re->op();
|
|
978
977
|
failed_ = true;
|
|
978
|
+
LOG(DFATAL) << "Missing case in Compiler: " << re->op();
|
|
979
979
|
return NoMatch();
|
|
980
980
|
}
|
|
981
981
|
|
package/vendor/re2/dfa.cc
CHANGED
|
@@ -1675,8 +1675,8 @@ bool DFA::AnalyzeSearch(SearchParams* params) {
|
|
|
1675
1675
|
if (!AnalyzeSearchHelper(params, info, flags)) {
|
|
1676
1676
|
ResetCache(params->cache_lock);
|
|
1677
1677
|
if (!AnalyzeSearchHelper(params, info, flags)) {
|
|
1678
|
-
LOG(DFATAL) << "Failed to analyze start state.";
|
|
1679
1678
|
params->failed = true;
|
|
1679
|
+
LOG(DFATAL) << "Failed to analyze start state.";
|
|
1680
1680
|
return false;
|
|
1681
1681
|
}
|
|
1682
1682
|
}
|
|
@@ -9,8 +9,10 @@
|
|
|
9
9
|
#include <string>
|
|
10
10
|
#include <vector>
|
|
11
11
|
|
|
12
|
+
#include "re2/filtered_re2.h"
|
|
12
13
|
#include "re2/re2.h"
|
|
13
14
|
#include "re2/regexp.h"
|
|
15
|
+
#include "re2/set.h"
|
|
14
16
|
#include "re2/walker-inl.h"
|
|
15
17
|
|
|
16
18
|
using re2::StringPiece;
|
|
@@ -96,7 +98,7 @@ class SubstringWalker : public re2::Regexp::Walker<int> {
|
|
|
96
98
|
};
|
|
97
99
|
|
|
98
100
|
void TestOneInput(StringPiece pattern, const RE2::Options& options,
|
|
99
|
-
StringPiece text) {
|
|
101
|
+
RE2::Anchor anchor, StringPiece text) {
|
|
100
102
|
// Crudely limit the use of ., \p, \P, \d, \D, \s, \S, \w and \W.
|
|
101
103
|
// Otherwise, we will waste time on inputs that have long runs of various
|
|
102
104
|
// character classes. The fuzzer has shown itself to be easily capable of
|
|
@@ -131,6 +133,9 @@ void TestOneInput(StringPiece pattern, const RE2::Options& options,
|
|
|
131
133
|
if (backslash_p > 1)
|
|
132
134
|
return;
|
|
133
135
|
|
|
136
|
+
// Iterate just once when fuzzing. Otherwise, we easily get bogged down
|
|
137
|
+
// and coverage is unlikely to improve despite significant expense.
|
|
138
|
+
RE2::FUZZING_ONLY_set_maximum_global_replace_count(1);
|
|
134
139
|
// The default is 1000. Even 100 turned out to be too generous
|
|
135
140
|
// for fuzzing, empirically speaking, so let's try 10 instead.
|
|
136
141
|
re2::Regexp::FUZZING_ONLY_set_maximum_repeat_count(10);
|
|
@@ -206,6 +211,29 @@ void TestOneInput(StringPiece pattern, const RE2::Options& options,
|
|
|
206
211
|
dummy += re.NamedCapturingGroups().size();
|
|
207
212
|
dummy += re.CapturingGroupNames().size();
|
|
208
213
|
dummy += RE2::QuoteMeta(pattern).size();
|
|
214
|
+
|
|
215
|
+
RE2::Set set(options, anchor);
|
|
216
|
+
int index = set.Add(pattern, /*error=*/NULL); // -1 on error
|
|
217
|
+
if (index != -1 && set.Compile()) {
|
|
218
|
+
std::vector<int> matches;
|
|
219
|
+
set.Match(text, &matches);
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
re2::FilteredRE2 filter;
|
|
223
|
+
index = -1; // not clobbered on error
|
|
224
|
+
filter.Add(pattern, options, &index);
|
|
225
|
+
if (index != -1) {
|
|
226
|
+
std::vector<std::string> atoms;
|
|
227
|
+
filter.Compile(&atoms);
|
|
228
|
+
// Pretend that all atoms match, which
|
|
229
|
+
// triggers the AND-OR tree maximally.
|
|
230
|
+
std::vector<int> matched_atoms;
|
|
231
|
+
matched_atoms.reserve(atoms.size());
|
|
232
|
+
for (size_t i = 0; i < atoms.size(); ++i)
|
|
233
|
+
matched_atoms.push_back(static_cast<int>(i));
|
|
234
|
+
std::vector<int> matches;
|
|
235
|
+
filter.AllMatches(text, matched_atoms, &matches);
|
|
236
|
+
}
|
|
209
237
|
}
|
|
210
238
|
|
|
211
239
|
// Entry point for libFuzzer.
|
|
@@ -239,9 +267,17 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
|
|
|
239
267
|
options.set_word_boundary(fdp.ConsumeBool());
|
|
240
268
|
options.set_one_line(fdp.ConsumeBool());
|
|
241
269
|
|
|
270
|
+
// ConsumeEnum<RE2::Anchor>() would require RE2::Anchor to specify
|
|
271
|
+
// kMaxValue, so just use PickValueInArray<RE2::Anchor>() instead.
|
|
272
|
+
RE2::Anchor anchor = fdp.PickValueInArray<RE2::Anchor>({
|
|
273
|
+
RE2::UNANCHORED,
|
|
274
|
+
RE2::ANCHOR_START,
|
|
275
|
+
RE2::ANCHOR_BOTH,
|
|
276
|
+
});
|
|
277
|
+
|
|
242
278
|
std::string pattern = fdp.ConsumeRandomLengthString(999);
|
|
243
279
|
std::string text = fdp.ConsumeRandomLengthString(999);
|
|
244
280
|
|
|
245
|
-
TestOneInput(pattern, options, text);
|
|
281
|
+
TestOneInput(pattern, options, anchor, text);
|
|
246
282
|
return 0;
|
|
247
283
|
}
|
package/vendor/re2/parse.cc
CHANGED
|
@@ -1589,8 +1589,6 @@ static bool ParseEscape(StringPiece* s, Rune* rp,
|
|
|
1589
1589
|
// return true;
|
|
1590
1590
|
}
|
|
1591
1591
|
|
|
1592
|
-
LOG(DFATAL) << "Not reached in ParseEscape.";
|
|
1593
|
-
|
|
1594
1592
|
BadEscape:
|
|
1595
1593
|
// Unrecognized escape sequence.
|
|
1596
1594
|
status->set_code(kRegexpBadEscape);
|
|
@@ -2059,8 +2057,8 @@ bool Regexp::ParseState::ParsePerlFlags(StringPiece* s) {
|
|
|
2059
2057
|
|
|
2060
2058
|
// Caller is supposed to check this.
|
|
2061
2059
|
if (!(flags_ & PerlX) || t.size() < 2 || t[0] != '(' || t[1] != '?') {
|
|
2062
|
-
LOG(DFATAL) << "Bad call to ParseState::ParsePerlFlags";
|
|
2063
2060
|
status_->set_code(kRegexpInternalError);
|
|
2061
|
+
LOG(DFATAL) << "Bad call to ParseState::ParsePerlFlags";
|
|
2064
2062
|
return false;
|
|
2065
2063
|
}
|
|
2066
2064
|
|
package/vendor/re2/prefilter.cc
CHANGED
|
@@ -7,6 +7,7 @@
|
|
|
7
7
|
#include <stddef.h>
|
|
8
8
|
#include <stdint.h>
|
|
9
9
|
#include <string>
|
|
10
|
+
#include <utility>
|
|
10
11
|
#include <vector>
|
|
11
12
|
|
|
12
13
|
#include "util/util.h"
|
|
@@ -21,9 +22,6 @@ namespace re2 {
|
|
|
21
22
|
|
|
22
23
|
static const bool ExtraDebug = false;
|
|
23
24
|
|
|
24
|
-
typedef std::set<std::string>::iterator SSIter;
|
|
25
|
-
typedef std::set<std::string>::const_iterator ConstSSIter;
|
|
26
|
-
|
|
27
25
|
// Initializes a Prefilter, allocating subs_ as necessary.
|
|
28
26
|
Prefilter::Prefilter(Op op) {
|
|
29
27
|
op_ = op;
|
|
@@ -140,7 +138,7 @@ Prefilter* Prefilter::Or(Prefilter* a, Prefilter* b) {
|
|
|
140
138
|
return AndOr(OR, a, b);
|
|
141
139
|
}
|
|
142
140
|
|
|
143
|
-
|
|
141
|
+
void Prefilter::SimplifyStringSet(SSet* ss) {
|
|
144
142
|
// Now make sure that the strings aren't redundant. For example, if
|
|
145
143
|
// we know "ab" is a required string, then it doesn't help at all to
|
|
146
144
|
// know that "abc" is also a required string, so delete "abc". This
|
|
@@ -149,13 +147,19 @@ static void SimplifyStringSet(std::set<std::string>* ss) {
|
|
|
149
147
|
// candidate for match, so further matching "abc" is redundant.
|
|
150
148
|
// Note that we must ignore "" because find() would find it at the
|
|
151
149
|
// start of everything and thus we would end up erasing everything.
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
150
|
+
//
|
|
151
|
+
// The SSet sorts strings by length, then lexicographically. Note that
|
|
152
|
+
// smaller strings appear first and all strings must be unique. These
|
|
153
|
+
// observations let us skip string comparisons when possible.
|
|
154
|
+
SSIter i = ss->begin();
|
|
155
|
+
if (i != ss->end() && i->empty()) {
|
|
156
|
+
++i;
|
|
157
|
+
}
|
|
158
|
+
for (; i != ss->end(); ++i) {
|
|
155
159
|
SSIter j = i;
|
|
156
160
|
++j;
|
|
157
161
|
while (j != ss->end()) {
|
|
158
|
-
if (j->find(*i) != std::string::npos) {
|
|
162
|
+
if (j->size() > i->size() && j->find(*i) != std::string::npos) {
|
|
159
163
|
j = ss->erase(j);
|
|
160
164
|
continue;
|
|
161
165
|
}
|
|
@@ -164,7 +168,7 @@ static void SimplifyStringSet(std::set<std::string>* ss) {
|
|
|
164
168
|
}
|
|
165
169
|
}
|
|
166
170
|
|
|
167
|
-
Prefilter* Prefilter::OrStrings(
|
|
171
|
+
Prefilter* Prefilter::OrStrings(SSet* ss) {
|
|
168
172
|
Prefilter* or_prefilter = new Prefilter(NONE);
|
|
169
173
|
SimplifyStringSet(ss);
|
|
170
174
|
for (SSIter i = ss->begin(); i != ss->end(); ++i)
|
|
@@ -226,14 +230,14 @@ class Prefilter::Info {
|
|
|
226
230
|
// Caller takes ownership of the Prefilter.
|
|
227
231
|
Prefilter* TakeMatch();
|
|
228
232
|
|
|
229
|
-
|
|
233
|
+
SSet& exact() { return exact_; }
|
|
230
234
|
|
|
231
235
|
bool is_exact() const { return is_exact_; }
|
|
232
236
|
|
|
233
237
|
class Walker;
|
|
234
238
|
|
|
235
239
|
private:
|
|
236
|
-
|
|
240
|
+
SSet exact_;
|
|
237
241
|
|
|
238
242
|
// When is_exact_ is true, the strings that match
|
|
239
243
|
// are placed in exact_. When it is no longer an exact
|
|
@@ -286,18 +290,7 @@ std::string Prefilter::Info::ToString() {
|
|
|
286
290
|
return "";
|
|
287
291
|
}
|
|
288
292
|
|
|
289
|
-
|
|
290
|
-
static void CopyIn(const std::set<std::string>& src,
|
|
291
|
-
std::set<std::string>* dst) {
|
|
292
|
-
for (ConstSSIter i = src.begin(); i != src.end(); ++i)
|
|
293
|
-
dst->insert(*i);
|
|
294
|
-
}
|
|
295
|
-
|
|
296
|
-
// Add the cross-product of a and b to dst.
|
|
297
|
-
// (For each string i in a and j in b, add i+j.)
|
|
298
|
-
static void CrossProduct(const std::set<std::string>& a,
|
|
299
|
-
const std::set<std::string>& b,
|
|
300
|
-
std::set<std::string>* dst) {
|
|
293
|
+
void Prefilter::CrossProduct(const SSet& a, const SSet& b, SSet* dst) {
|
|
301
294
|
for (ConstSSIter i = a.begin(); i != a.end(); ++i)
|
|
302
295
|
for (ConstSSIter j = b.begin(); j != b.end(); ++j)
|
|
303
296
|
dst->insert(*i + *j);
|
|
@@ -343,8 +336,14 @@ Prefilter::Info* Prefilter::Info::Alt(Info* a, Info* b) {
|
|
|
343
336
|
Info *ab = new Info();
|
|
344
337
|
|
|
345
338
|
if (a->is_exact_ && b->is_exact_) {
|
|
346
|
-
|
|
347
|
-
|
|
339
|
+
// Avoid string copies by moving the larger exact_ set into
|
|
340
|
+
// ab directly, then merge in the smaller set.
|
|
341
|
+
if (a->exact_.size() < b->exact_.size()) {
|
|
342
|
+
using std::swap;
|
|
343
|
+
swap(a, b);
|
|
344
|
+
}
|
|
345
|
+
ab->exact_ = std::move(a->exact_);
|
|
346
|
+
ab->exact_.insert(b->exact_.begin(), b->exact_.end());
|
|
348
347
|
ab->is_exact_ = true;
|
|
349
348
|
} else {
|
|
350
349
|
// Either a or b has is_exact_ = false. If the other
|
|
@@ -532,8 +531,8 @@ Prefilter::Info* Prefilter::Info::Walker::PostVisit(
|
|
|
532
531
|
switch (re->op()) {
|
|
533
532
|
default:
|
|
534
533
|
case kRegexpRepeat:
|
|
535
|
-
LOG(DFATAL) << "Bad regexp op " << re->op();
|
|
536
534
|
info = EmptyString();
|
|
535
|
+
LOG(DFATAL) << "Bad regexp op " << re->op();
|
|
537
536
|
break;
|
|
538
537
|
|
|
539
538
|
case kRegexpNoMatch:
|