re2 1.16.0 → 1.17.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/actions/{linux-alpine-node-15 → linux-alpine-node-17}/Dockerfile +1 -1
- package/.github/actions/linux-alpine-node-17/action.yml +7 -0
- package/.github/actions/{linux-alpine-node-15 → linux-alpine-node-17}/entrypoint.sh +0 -0
- package/.github/dependabot.yml +1 -1
- package/.github/workflows/build.yml +8 -8
- package/.github/workflows/tests.yml +1 -1
- package/README.md +8 -0
- package/lib/exec.cc +4 -1
- package/lib/match.cc +4 -1
- package/lib/replace.cc +50 -41
- package/lib/test.cc +4 -1
- package/package.json +6 -6
- package/tests/test_exec.js +16 -0
- package/tests/test_general.js +6 -0
- package/tests/test_replace.js +278 -227
- package/vendor/README +3 -1
- package/vendor/re2/bitstate.cc +3 -3
- package/vendor/re2/compile.cc +50 -34
- package/vendor/re2/dfa.cc +24 -21
- package/vendor/re2/fuzzing/re2_fuzzer.cc +96 -20
- package/vendor/re2/make_perl_groups.pl +1 -1
- package/vendor/re2/nfa.cc +5 -5
- package/vendor/re2/onepass.cc +2 -2
- package/vendor/re2/parse.cc +41 -22
- package/vendor/re2/perl_groups.cc +34 -34
- package/vendor/re2/prog.cc +188 -4
- package/vendor/re2/prog.h +45 -13
- package/vendor/re2/re2.cc +7 -12
- package/vendor/re2/re2.h +7 -3
- package/vendor/re2/regexp.cc +11 -5
- package/vendor/re2/regexp.h +7 -2
- package/vendor/re2/set.cc +3 -0
- package/vendor/re2/testing/backtrack.cc +3 -3
- package/vendor/re2/testing/compile_test.cc +45 -21
- package/vendor/re2/testing/dfa_test.cc +4 -4
- package/vendor/re2/testing/exhaustive_tester.cc +2 -2
- package/vendor/re2/testing/parse_test.cc +1 -0
- package/vendor/re2/testing/re2_test.cc +31 -16
- package/vendor/re2/testing/regexp_benchmark.cc +108 -121
- package/vendor/re2/testing/required_prefix_test.cc +78 -24
- package/vendor/re2/testing/search_test.cc +2 -0
- package/vendor/re2/testing/tester.cc +9 -9
- package/vendor/re2/tostring.cc +1 -1
- package/vendor/re2/unicode.py +1 -1
- package/vendor/re2/unicode_casefold.cc +25 -11
- package/vendor/re2/unicode_groups.cc +319 -151
- package/vendor/re2/walker-inl.h +3 -2
- package/vendor/util/mutex.h +2 -2
- package/.github/actions/linux-alpine-node-15/action.yml +0 -7
|
File without changes
|
package/.github/dependabot.yml
CHANGED
|
@@ -31,7 +31,7 @@ jobs:
|
|
|
31
31
|
strategy:
|
|
32
32
|
matrix:
|
|
33
33
|
os: [windows-latest, macOS-latest]
|
|
34
|
-
node-version: [12, 14,
|
|
34
|
+
node-version: [12, 14, 16, 17]
|
|
35
35
|
|
|
36
36
|
steps:
|
|
37
37
|
- uses: actions/checkout@v2
|
|
@@ -73,7 +73,7 @@ jobs:
|
|
|
73
73
|
|
|
74
74
|
strategy:
|
|
75
75
|
matrix:
|
|
76
|
-
node-version: [12, 14,
|
|
76
|
+
node-version: [12, 14, 16, 17]
|
|
77
77
|
|
|
78
78
|
steps:
|
|
79
79
|
- uses: actions/checkout@v2
|
|
@@ -152,8 +152,8 @@ jobs:
|
|
|
152
152
|
env:
|
|
153
153
|
GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
|
|
154
154
|
|
|
155
|
-
build-linux-alpine-node-
|
|
156
|
-
name: Node.js
|
|
155
|
+
build-linux-alpine-node-16:
|
|
156
|
+
name: Node.js 16 on Alpine Linux
|
|
157
157
|
needs: create-release
|
|
158
158
|
runs-on: ubuntu-latest
|
|
159
159
|
continue-on-error: true
|
|
@@ -175,12 +175,12 @@ jobs:
|
|
|
175
175
|
Linux-Alpine-node-
|
|
176
176
|
Linux-Alpine-
|
|
177
177
|
- name: Install, test, and create artifact
|
|
178
|
-
uses: ./.github/actions/linux-alpine-node-
|
|
178
|
+
uses: ./.github/actions/linux-alpine-node-16/
|
|
179
179
|
env:
|
|
180
180
|
GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
|
|
181
181
|
|
|
182
|
-
build-linux-alpine-node-
|
|
183
|
-
name: Node.js
|
|
182
|
+
build-linux-alpine-node-17:
|
|
183
|
+
name: Node.js 17 on Alpine Linux
|
|
184
184
|
needs: create-release
|
|
185
185
|
runs-on: ubuntu-latest
|
|
186
186
|
continue-on-error: true
|
|
@@ -202,6 +202,6 @@ jobs:
|
|
|
202
202
|
Linux-Alpine-node-
|
|
203
203
|
Linux-Alpine-
|
|
204
204
|
- name: Install, test, and create artifact
|
|
205
|
-
uses: ./.github/actions/linux-alpine-node-
|
|
205
|
+
uses: ./.github/actions/linux-alpine-node-17/
|
|
206
206
|
env:
|
|
207
207
|
GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
|
package/README.md
CHANGED
|
@@ -13,6 +13,10 @@ at his [Implementing Regular Expressions](http://swtch.com/~rsc/regexp/) page.
|
|
|
13
13
|
(see [Syntax](https://github.com/google/re2/wiki/Syntax)),
|
|
14
14
|
but it lacks two features: backreferences and lookahead assertions. See below for more details.
|
|
15
15
|
|
|
16
|
+
`RE2` always works in the [Unicode mode](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/unicode), which means that all matches that use character codes are interpret as Unicode code points, not as binary values of UTF-16.
|
|
17
|
+
See `RE2.unicodeWarningLevel` below for more details.
|
|
18
|
+
|
|
19
|
+
|
|
16
20
|
`RE2` object emulates standard `RegExp` making it a practical drop-in replacement in most cases.
|
|
17
21
|
`RE2` is extended to provide `String`-based regular expression methods as well. To help to convert
|
|
18
22
|
`RegExp` objects to `RE2` its constructor can take `RegExp` directly honoring all properties.
|
|
@@ -343,6 +347,10 @@ console.log('re2_res : ' + re2_res); // prints: re2_res : abc,a,b,c
|
|
|
343
347
|
|
|
344
348
|
## Release history
|
|
345
349
|
|
|
350
|
+
- 1.17.3 *Fixed bug with zero-length replacements.*
|
|
351
|
+
- 1.17.2 *Added support for the enhanced local mirroring by updating [install-artifact-from-github](https://github.com/uhop/install-artifact-from-github).*
|
|
352
|
+
- 1.17.1 *Fix for `lastIndex` for U+10000 - U+10FFFF UTF characters. Thx, [omg](https://github.com/omg).*
|
|
353
|
+
- 1.17.0 *Updated GYP, added support for Node 17, updated deps.*
|
|
346
354
|
- 1.16.0 *Updated the compiler (thx, [Sergei Dyshel](https://github.com/sergei-dyshel)), updated GYP, removed support for Node 10, added support for Node 16, updated TS bindings (thx, [BannerBomb](https://github.com/BannerBomb)).*
|
|
347
355
|
- 1.15.9 *Updated deps.*
|
|
348
356
|
- 1.15.8 *Updated deps.*
|
package/lib/exec.cc
CHANGED
|
@@ -50,7 +50,10 @@ NAN_METHOD(WrappedRE2::Exec)
|
|
|
50
50
|
}
|
|
51
51
|
for (size_t n = re2->lastIndex; n; --n)
|
|
52
52
|
{
|
|
53
|
-
|
|
53
|
+
size_t s = getUtf8CharSize(str.data[lastIndex]);
|
|
54
|
+
lastIndex += s;
|
|
55
|
+
if (s == 4 && n >= 2) --n; // this utf8 character will take two utf16 characters
|
|
56
|
+
// the decrement above is protected to avoid an overflow of an unsigned integer
|
|
54
57
|
}
|
|
55
58
|
}
|
|
56
59
|
}
|
package/lib/match.cc
CHANGED
|
@@ -59,7 +59,10 @@ NAN_METHOD(WrappedRE2::Match)
|
|
|
59
59
|
{
|
|
60
60
|
for (size_t n = re2->lastIndex; n; --n)
|
|
61
61
|
{
|
|
62
|
-
|
|
62
|
+
size_t s = getUtf8CharSize(a.data[lastIndex]);
|
|
63
|
+
lastIndex += s;
|
|
64
|
+
if (s == 4 && n >= 2) --n; // this utf8 character will take two utf16 characters
|
|
65
|
+
// the decrement above is protected to avoid an overflow of an unsigned integer
|
|
63
66
|
}
|
|
64
67
|
anchor = RE2::ANCHOR_START;
|
|
65
68
|
}
|
package/lib/replace.cc
CHANGED
|
@@ -226,7 +226,12 @@ static Nan::Maybe<std::string> replace(WrappedRE2 *re2, const StrVal &replacee,
|
|
|
226
226
|
{
|
|
227
227
|
for (size_t n = re2->lastIndex; n; --n)
|
|
228
228
|
{
|
|
229
|
-
|
|
229
|
+
size_t s = getUtf8CharSize(data[lastIndex]);
|
|
230
|
+
lastIndex += s;
|
|
231
|
+
if (s == 4 && n >= 2) {
|
|
232
|
+
--n; // this utf8 character will take two utf16 characters
|
|
233
|
+
}
|
|
234
|
+
// the decrement above is protected to avoid an overflow of an unsigned integer
|
|
230
235
|
}
|
|
231
236
|
}
|
|
232
237
|
}
|
|
@@ -242,28 +247,30 @@ static Nan::Maybe<std::string> replace(WrappedRE2 *re2, const StrVal &replacee,
|
|
|
242
247
|
while (lastIndex <= size && re2->regexp.Match(str, lastIndex, size, anchor, &groups[0], groups.size()))
|
|
243
248
|
{
|
|
244
249
|
noMatch = false;
|
|
250
|
+
auto offset = match.data() - data;
|
|
245
251
|
if (!re2->global && re2->sticky)
|
|
246
252
|
{
|
|
247
|
-
re2->lastIndex += replacee.isBuffer ?
|
|
253
|
+
re2->lastIndex += replacee.isBuffer ? offset + match.size() - lastIndex : getUtf16Length(data + lastIndex, match.data() + match.size());
|
|
254
|
+
}
|
|
255
|
+
if (match.data() == data || offset > static_cast<long>(lastIndex))
|
|
256
|
+
{
|
|
257
|
+
result += std::string(data + lastIndex, offset - lastIndex);
|
|
248
258
|
}
|
|
259
|
+
result += replace(replacer, replacer_size, groups, str, namedGroups);
|
|
249
260
|
if (match.size())
|
|
250
261
|
{
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
262
|
+
lastIndex = offset + match.size();
|
|
263
|
+
}
|
|
264
|
+
else if (offset < size)
|
|
265
|
+
{
|
|
266
|
+
auto sym_size = getUtf8CharSize(data[offset]);
|
|
267
|
+
result.append(data + offset, sym_size);
|
|
268
|
+
lastIndex = offset + sym_size;
|
|
257
269
|
}
|
|
258
270
|
else
|
|
259
271
|
{
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
if (lastIndex < size)
|
|
263
|
-
{
|
|
264
|
-
result.append(data + lastIndex, sym_size);
|
|
265
|
-
}
|
|
266
|
-
lastIndex += sym_size;
|
|
272
|
+
lastIndex = size;
|
|
273
|
+
break;
|
|
267
274
|
}
|
|
268
275
|
if (!re2->global)
|
|
269
276
|
{
|
|
@@ -292,7 +299,7 @@ static Nan::Maybe<std::string> replace(WrappedRE2 *re2, const StrVal &replacee,
|
|
|
292
299
|
|
|
293
300
|
inline Nan::Maybe<std::string> replace(const Nan::Callback *replacer, const std::vector<re2::StringPiece> &groups, const re2::StringPiece &str, const v8::Local<v8::Value> &input, bool useBuffers, const std::map<std::string, int> &namedGroups)
|
|
294
301
|
{
|
|
295
|
-
std::vector<v8::Local<v8::Value
|
|
302
|
+
std::vector<v8::Local<v8::Value> > argv;
|
|
296
303
|
|
|
297
304
|
auto context = Nan::GetCurrentContext();
|
|
298
305
|
|
|
@@ -372,7 +379,12 @@ static Nan::Maybe<std::string> replace(WrappedRE2 *re2, const StrVal &replacee,
|
|
|
372
379
|
{
|
|
373
380
|
for (size_t n = re2->lastIndex; n; --n)
|
|
374
381
|
{
|
|
375
|
-
|
|
382
|
+
size_t s = getUtf8CharSize(data[lastIndex]);
|
|
383
|
+
lastIndex += s;
|
|
384
|
+
if (s == 4 && n >= 2) {
|
|
385
|
+
--n; // this utf8 character will take two utf16 characters
|
|
386
|
+
}
|
|
387
|
+
// the decrement above is protected to avoid an overflow of an unsigned integer
|
|
376
388
|
}
|
|
377
389
|
}
|
|
378
390
|
}
|
|
@@ -390,38 +402,35 @@ static Nan::Maybe<std::string> replace(WrappedRE2 *re2, const StrVal &replacee,
|
|
|
390
402
|
while (lastIndex <= size && re2->regexp.Match(str, lastIndex, size, anchor, &groups[0], groups.size()))
|
|
391
403
|
{
|
|
392
404
|
noMatch = false;
|
|
405
|
+
auto offset = match.data() - data;
|
|
393
406
|
if (!re2->global && re2->sticky)
|
|
394
407
|
{
|
|
395
|
-
re2->lastIndex += replacee.isBuffer ?
|
|
408
|
+
re2->lastIndex += replacee.isBuffer ? offset + match.size() - lastIndex : getUtf16Length(data + lastIndex, match.data() + match.size());
|
|
409
|
+
}
|
|
410
|
+
if (match.data() == data || offset > static_cast<long>(lastIndex))
|
|
411
|
+
{
|
|
412
|
+
result += std::string(data + lastIndex, offset - lastIndex);
|
|
396
413
|
}
|
|
414
|
+
const auto part = replace(replacer, groups, str, input, useBuffers, namedGroups);
|
|
415
|
+
if (part.IsNothing())
|
|
416
|
+
{
|
|
417
|
+
return part;
|
|
418
|
+
}
|
|
419
|
+
result += part.FromJust();
|
|
397
420
|
if (match.size())
|
|
398
421
|
{
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
return part;
|
|
407
|
-
}
|
|
408
|
-
result += part.FromJust();
|
|
409
|
-
lastIndex = match.data() - data + match.size();
|
|
422
|
+
lastIndex = offset + match.size();
|
|
423
|
+
}
|
|
424
|
+
else if (offset < size)
|
|
425
|
+
{
|
|
426
|
+
auto sym_size = getUtf8CharSize(data[offset]);
|
|
427
|
+
result.append(data + offset, sym_size);
|
|
428
|
+
lastIndex = offset + sym_size;
|
|
410
429
|
}
|
|
411
430
|
else
|
|
412
431
|
{
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
{
|
|
416
|
-
return part;
|
|
417
|
-
}
|
|
418
|
-
result += part.FromJust();
|
|
419
|
-
size_t sym_size = getUtf8CharSize(data[lastIndex]);
|
|
420
|
-
if (lastIndex < size)
|
|
421
|
-
{
|
|
422
|
-
result.append(data + lastIndex, sym_size);
|
|
423
|
-
}
|
|
424
|
-
lastIndex += sym_size;
|
|
432
|
+
lastIndex = size;
|
|
433
|
+
break;
|
|
425
434
|
}
|
|
426
435
|
if (!re2->global)
|
|
427
436
|
{
|
package/lib/test.cc
CHANGED
|
@@ -50,7 +50,10 @@ NAN_METHOD(WrappedRE2::Test)
|
|
|
50
50
|
}
|
|
51
51
|
for (size_t n = re2->lastIndex; n; --n)
|
|
52
52
|
{
|
|
53
|
-
|
|
53
|
+
size_t s = getUtf8CharSize(str.data[lastIndex]);
|
|
54
|
+
lastIndex += s;
|
|
55
|
+
if (s == 4 && n >= 2) --n; // this utf8 character will take two utf16 characters
|
|
56
|
+
// the decrement above is protected to avoid an overflow of an unsigned integer
|
|
54
57
|
}
|
|
55
58
|
}
|
|
56
59
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "re2",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.17.3",
|
|
4
4
|
"description": "Bindings for RE2: fast, safe alternative to backtracking regular expression engines.",
|
|
5
5
|
"homepage": "https://github.com/uhop/node-re2",
|
|
6
6
|
"bugs": "https://github.com/uhop/node-re2/issues",
|
|
@@ -10,9 +10,9 @@
|
|
|
10
10
|
"test": "tests"
|
|
11
11
|
},
|
|
12
12
|
"dependencies": {
|
|
13
|
-
"install-artifact-from-github": "^1.
|
|
14
|
-
"nan": "^2.
|
|
15
|
-
"node-gyp": "^8.
|
|
13
|
+
"install-artifact-from-github": "^1.3.0",
|
|
14
|
+
"nan": "^2.15.0",
|
|
15
|
+
"node-gyp": "^8.4.1"
|
|
16
16
|
},
|
|
17
17
|
"devDependencies": {
|
|
18
18
|
"heya-unit": "^0.3.0"
|
|
@@ -20,7 +20,7 @@
|
|
|
20
20
|
"scripts": {
|
|
21
21
|
"test": "node tests/tests.js",
|
|
22
22
|
"save-to-github": "save-to-github-cache --artifact build/Release/re2.node",
|
|
23
|
-
"install": "install-from-cache --artifact build/Release/re2.node --host-var RE2_DOWNLOAD_MIRROR || npm run rebuild",
|
|
23
|
+
"install": "install-from-cache --artifact build/Release/re2.node --host-var RE2_DOWNLOAD_MIRROR --skip-path-var RE2_DOWNLOAD_SKIP_PATH --skip-ver-var RE2_DOWNLOAD_SKIP_VER || npm run rebuild",
|
|
24
24
|
"verify-build": "node scripts/verify-build.js",
|
|
25
25
|
"rebuild": "node-gyp rebuild"
|
|
26
26
|
},
|
|
@@ -35,6 +35,6 @@
|
|
|
35
35
|
"text processing",
|
|
36
36
|
"PCRE alternative"
|
|
37
37
|
],
|
|
38
|
-
"author": "Eugene Lazutkin <eugene.lazutkin@gmail.com> (
|
|
38
|
+
"author": "Eugene Lazutkin <eugene.lazutkin@gmail.com> (https://lazutkin.com/)",
|
|
39
39
|
"license": "BSD-3-Clause"
|
|
40
40
|
}
|
package/tests/test_exec.js
CHANGED
|
@@ -284,6 +284,22 @@ unit.add(module, [
|
|
|
284
284
|
eval(t.TEST("re2.lastIndex === 6"));
|
|
285
285
|
},
|
|
286
286
|
|
|
287
|
+
function test_execSupplemental(t) {
|
|
288
|
+
"use strict";
|
|
289
|
+
|
|
290
|
+
var re = new RE2("\\w+", "g");
|
|
291
|
+
var testString = "🤡🤡🤡 Hello clown world!";
|
|
292
|
+
|
|
293
|
+
var result = re.exec(testString);
|
|
294
|
+
eval(t.TEST("t.unify(result, ['Hello'])"));
|
|
295
|
+
|
|
296
|
+
result = re.exec(testString);
|
|
297
|
+
eval(t.TEST("t.unify(result, ['clown'])"));
|
|
298
|
+
|
|
299
|
+
result = re.exec(testString);
|
|
300
|
+
eval(t.TEST("t.unify(result, ['world'])"));
|
|
301
|
+
},
|
|
302
|
+
|
|
287
303
|
// Multiline test
|
|
288
304
|
|
|
289
305
|
function test_execMultiline(t) {
|
package/tests/test_general.js
CHANGED
|
@@ -209,6 +209,12 @@ unit.add(module, [
|
|
|
209
209
|
eval(t.TEST("s3.length === 1"));
|
|
210
210
|
eval(t.TEST("RE2.getUtf8Length(s3) === 3"));
|
|
211
211
|
|
|
212
|
+
var s4 = "🤡";
|
|
213
|
+
|
|
214
|
+
eval(t.TEST("s4.length === 2"));
|
|
215
|
+
eval(t.TEST("RE2.getUtf8Length(s4) === 4"));
|
|
216
|
+
eval(t.TEST("RE2.getUtf16Length(Buffer.from(s4, 'utf8')) === s4.length"));
|
|
217
|
+
|
|
212
218
|
var b3 = new Buffer([0xF0]);
|
|
213
219
|
|
|
214
220
|
eval(t.TEST("b3.length === 1"));
|