re2 1.16.0 → 1.17.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/.github/actions/{linux-alpine-node-15 → linux-alpine-node-17}/Dockerfile +1 -1
  2. package/.github/actions/linux-alpine-node-17/action.yml +7 -0
  3. package/.github/actions/{linux-alpine-node-15 → linux-alpine-node-17}/entrypoint.sh +0 -0
  4. package/.github/dependabot.yml +1 -1
  5. package/.github/workflows/build.yml +8 -8
  6. package/.github/workflows/tests.yml +1 -1
  7. package/README.md +8 -0
  8. package/lib/exec.cc +4 -1
  9. package/lib/match.cc +4 -1
  10. package/lib/replace.cc +50 -41
  11. package/lib/test.cc +4 -1
  12. package/package.json +6 -6
  13. package/tests/test_exec.js +16 -0
  14. package/tests/test_general.js +6 -0
  15. package/tests/test_replace.js +278 -227
  16. package/vendor/README +3 -1
  17. package/vendor/re2/bitstate.cc +3 -3
  18. package/vendor/re2/compile.cc +50 -34
  19. package/vendor/re2/dfa.cc +24 -21
  20. package/vendor/re2/fuzzing/re2_fuzzer.cc +96 -20
  21. package/vendor/re2/make_perl_groups.pl +1 -1
  22. package/vendor/re2/nfa.cc +5 -5
  23. package/vendor/re2/onepass.cc +2 -2
  24. package/vendor/re2/parse.cc +41 -22
  25. package/vendor/re2/perl_groups.cc +34 -34
  26. package/vendor/re2/prog.cc +188 -4
  27. package/vendor/re2/prog.h +45 -13
  28. package/vendor/re2/re2.cc +7 -12
  29. package/vendor/re2/re2.h +7 -3
  30. package/vendor/re2/regexp.cc +11 -5
  31. package/vendor/re2/regexp.h +7 -2
  32. package/vendor/re2/set.cc +3 -0
  33. package/vendor/re2/testing/backtrack.cc +3 -3
  34. package/vendor/re2/testing/compile_test.cc +45 -21
  35. package/vendor/re2/testing/dfa_test.cc +4 -4
  36. package/vendor/re2/testing/exhaustive_tester.cc +2 -2
  37. package/vendor/re2/testing/parse_test.cc +1 -0
  38. package/vendor/re2/testing/re2_test.cc +31 -16
  39. package/vendor/re2/testing/regexp_benchmark.cc +108 -121
  40. package/vendor/re2/testing/required_prefix_test.cc +78 -24
  41. package/vendor/re2/testing/search_test.cc +2 -0
  42. package/vendor/re2/testing/tester.cc +9 -9
  43. package/vendor/re2/tostring.cc +1 -1
  44. package/vendor/re2/unicode.py +1 -1
  45. package/vendor/re2/unicode_casefold.cc +25 -11
  46. package/vendor/re2/unicode_groups.cc +319 -151
  47. package/vendor/re2/walker-inl.h +3 -2
  48. package/vendor/util/mutex.h +2 -2
  49. package/.github/actions/linux-alpine-node-15/action.yml +0 -7
@@ -1,4 +1,4 @@
1
- FROM node:15-alpine
1
+ FROM node:17-alpine
2
2
 
3
3
  RUN apk add --no-cache python3 make gcc g++
4
4
 
@@ -0,0 +1,7 @@
1
+ name: 'Create a binary artifact for Node 17 on Alpine Linux'
2
+ description: 'Create a binary artifact for Node 17 on Alpine Linux using musl'
3
+ runs:
4
+ using: 'docker'
5
+ image: 'Dockerfile'
6
+ args:
7
+ - ${{inputs.node-version}}
@@ -5,7 +5,7 @@
5
5
 
6
6
  version: 2
7
7
  updates:
8
- - package-ecosystem: "nvm" # See documentation for possible values
8
+ - package-ecosystem: "npm" # See documentation for possible values
9
9
  directory: "/" # Location of package manifests
10
10
  schedule:
11
11
  interval: "weekly"
@@ -31,7 +31,7 @@ jobs:
31
31
  strategy:
32
32
  matrix:
33
33
  os: [windows-latest, macOS-latest]
34
- node-version: [12, 14, 15, 16]
34
+ node-version: [12, 14, 16, 17]
35
35
 
36
36
  steps:
37
37
  - uses: actions/checkout@v2
@@ -73,7 +73,7 @@ jobs:
73
73
 
74
74
  strategy:
75
75
  matrix:
76
- node-version: [12, 14, 15, 16]
76
+ node-version: [12, 14, 16, 17]
77
77
 
78
78
  steps:
79
79
  - uses: actions/checkout@v2
@@ -152,8 +152,8 @@ jobs:
152
152
  env:
153
153
  GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
154
154
 
155
- build-linux-alpine-node-15:
156
- name: Node.js 15 on Alpine Linux
155
+ build-linux-alpine-node-16:
156
+ name: Node.js 16 on Alpine Linux
157
157
  needs: create-release
158
158
  runs-on: ubuntu-latest
159
159
  continue-on-error: true
@@ -175,12 +175,12 @@ jobs:
175
175
  Linux-Alpine-node-
176
176
  Linux-Alpine-
177
177
  - name: Install, test, and create artifact
178
- uses: ./.github/actions/linux-alpine-node-15/
178
+ uses: ./.github/actions/linux-alpine-node-16/
179
179
  env:
180
180
  GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
181
181
 
182
- build-linux-alpine-node-16:
183
- name: Node.js 16 on Alpine Linux
182
+ build-linux-alpine-node-17:
183
+ name: Node.js 17 on Alpine Linux
184
184
  needs: create-release
185
185
  runs-on: ubuntu-latest
186
186
  continue-on-error: true
@@ -202,6 +202,6 @@ jobs:
202
202
  Linux-Alpine-node-
203
203
  Linux-Alpine-
204
204
  - name: Install, test, and create artifact
205
- uses: ./.github/actions/linux-alpine-node-16/
205
+ uses: ./.github/actions/linux-alpine-node-17/
206
206
  env:
207
207
  GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
@@ -14,7 +14,7 @@ jobs:
14
14
  strategy:
15
15
  matrix:
16
16
  os: [ubuntu-latest, windows-latest, macOS-latest]
17
- node-version: [12, 14, 15, 16]
17
+ node-version: [12, 14, 16, 17]
18
18
 
19
19
  steps:
20
20
  - uses: actions/checkout@v2
package/README.md CHANGED
@@ -13,6 +13,10 @@ at his [Implementing Regular Expressions](http://swtch.com/~rsc/regexp/) page.
13
13
  (see [Syntax](https://github.com/google/re2/wiki/Syntax)),
14
14
  but it lacks two features: backreferences and lookahead assertions. See below for more details.
15
15
 
16
+ `RE2` always works in the [Unicode mode](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/unicode), which means that all matches that use character codes are interpret as Unicode code points, not as binary values of UTF-16.
17
+ See `RE2.unicodeWarningLevel` below for more details.
18
+
19
+
16
20
  `RE2` object emulates standard `RegExp` making it a practical drop-in replacement in most cases.
17
21
  `RE2` is extended to provide `String`-based regular expression methods as well. To help to convert
18
22
  `RegExp` objects to `RE2` its constructor can take `RegExp` directly honoring all properties.
@@ -343,6 +347,10 @@ console.log('re2_res : ' + re2_res); // prints: re2_res : abc,a,b,c
343
347
 
344
348
  ## Release history
345
349
 
350
+ - 1.17.3 *Fixed bug with zero-length replacements.*
351
+ - 1.17.2 *Added support for the enhanced local mirroring by updating [install-artifact-from-github](https://github.com/uhop/install-artifact-from-github).*
352
+ - 1.17.1 *Fix for `lastIndex` for U+10000 - U+10FFFF UTF characters. Thx, [omg](https://github.com/omg).*
353
+ - 1.17.0 *Updated GYP, added support for Node 17, updated deps.*
346
354
  - 1.16.0 *Updated the compiler (thx, [Sergei Dyshel](https://github.com/sergei-dyshel)), updated GYP, removed support for Node 10, added support for Node 16, updated TS bindings (thx, [BannerBomb](https://github.com/BannerBomb)).*
347
355
  - 1.15.9 *Updated deps.*
348
356
  - 1.15.8 *Updated deps.*
package/lib/exec.cc CHANGED
@@ -50,7 +50,10 @@ NAN_METHOD(WrappedRE2::Exec)
50
50
  }
51
51
  for (size_t n = re2->lastIndex; n; --n)
52
52
  {
53
- lastIndex += getUtf8CharSize(str.data[lastIndex]);
53
+ size_t s = getUtf8CharSize(str.data[lastIndex]);
54
+ lastIndex += s;
55
+ if (s == 4 && n >= 2) --n; // this utf8 character will take two utf16 characters
56
+ // the decrement above is protected to avoid an overflow of an unsigned integer
54
57
  }
55
58
  }
56
59
  }
package/lib/match.cc CHANGED
@@ -59,7 +59,10 @@ NAN_METHOD(WrappedRE2::Match)
59
59
  {
60
60
  for (size_t n = re2->lastIndex; n; --n)
61
61
  {
62
- lastIndex += getUtf8CharSize(a.data[lastIndex]);
62
+ size_t s = getUtf8CharSize(a.data[lastIndex]);
63
+ lastIndex += s;
64
+ if (s == 4 && n >= 2) --n; // this utf8 character will take two utf16 characters
65
+ // the decrement above is protected to avoid an overflow of an unsigned integer
63
66
  }
64
67
  anchor = RE2::ANCHOR_START;
65
68
  }
package/lib/replace.cc CHANGED
@@ -226,7 +226,12 @@ static Nan::Maybe<std::string> replace(WrappedRE2 *re2, const StrVal &replacee,
226
226
  {
227
227
  for (size_t n = re2->lastIndex; n; --n)
228
228
  {
229
- lastIndex += getUtf8CharSize(data[lastIndex]);
229
+ size_t s = getUtf8CharSize(data[lastIndex]);
230
+ lastIndex += s;
231
+ if (s == 4 && n >= 2) {
232
+ --n; // this utf8 character will take two utf16 characters
233
+ }
234
+ // the decrement above is protected to avoid an overflow of an unsigned integer
230
235
  }
231
236
  }
232
237
  }
@@ -242,28 +247,30 @@ static Nan::Maybe<std::string> replace(WrappedRE2 *re2, const StrVal &replacee,
242
247
  while (lastIndex <= size && re2->regexp.Match(str, lastIndex, size, anchor, &groups[0], groups.size()))
243
248
  {
244
249
  noMatch = false;
250
+ auto offset = match.data() - data;
245
251
  if (!re2->global && re2->sticky)
246
252
  {
247
- re2->lastIndex += replacee.isBuffer ? match.data() - data + match.size() - lastIndex : getUtf16Length(data + lastIndex, match.data() + match.size());
253
+ re2->lastIndex += replacee.isBuffer ? offset + match.size() - lastIndex : getUtf16Length(data + lastIndex, match.data() + match.size());
254
+ }
255
+ if (match.data() == data || offset > static_cast<long>(lastIndex))
256
+ {
257
+ result += std::string(data + lastIndex, offset - lastIndex);
248
258
  }
259
+ result += replace(replacer, replacer_size, groups, str, namedGroups);
249
260
  if (match.size())
250
261
  {
251
- if (match.data() == data || match.data() - data > static_cast<long>(lastIndex))
252
- {
253
- result += std::string(data + lastIndex, match.data() - data - lastIndex);
254
- }
255
- result += replace(replacer, replacer_size, groups, str, namedGroups);
256
- lastIndex = match.data() - data + match.size();
262
+ lastIndex = offset + match.size();
263
+ }
264
+ else if (offset < size)
265
+ {
266
+ auto sym_size = getUtf8CharSize(data[offset]);
267
+ result.append(data + offset, sym_size);
268
+ lastIndex = offset + sym_size;
257
269
  }
258
270
  else
259
271
  {
260
- result += replace(replacer, replacer_size, groups, str, namedGroups);
261
- size_t sym_size = getUtf8CharSize(data[lastIndex]);
262
- if (lastIndex < size)
263
- {
264
- result.append(data + lastIndex, sym_size);
265
- }
266
- lastIndex += sym_size;
272
+ lastIndex = size;
273
+ break;
267
274
  }
268
275
  if (!re2->global)
269
276
  {
@@ -292,7 +299,7 @@ static Nan::Maybe<std::string> replace(WrappedRE2 *re2, const StrVal &replacee,
292
299
 
293
300
  inline Nan::Maybe<std::string> replace(const Nan::Callback *replacer, const std::vector<re2::StringPiece> &groups, const re2::StringPiece &str, const v8::Local<v8::Value> &input, bool useBuffers, const std::map<std::string, int> &namedGroups)
294
301
  {
295
- std::vector<v8::Local<v8::Value>> argv;
302
+ std::vector<v8::Local<v8::Value> > argv;
296
303
 
297
304
  auto context = Nan::GetCurrentContext();
298
305
 
@@ -372,7 +379,12 @@ static Nan::Maybe<std::string> replace(WrappedRE2 *re2, const StrVal &replacee,
372
379
  {
373
380
  for (size_t n = re2->lastIndex; n; --n)
374
381
  {
375
- lastIndex += getUtf8CharSize(data[lastIndex]);
382
+ size_t s = getUtf8CharSize(data[lastIndex]);
383
+ lastIndex += s;
384
+ if (s == 4 && n >= 2) {
385
+ --n; // this utf8 character will take two utf16 characters
386
+ }
387
+ // the decrement above is protected to avoid an overflow of an unsigned integer
376
388
  }
377
389
  }
378
390
  }
@@ -390,38 +402,35 @@ static Nan::Maybe<std::string> replace(WrappedRE2 *re2, const StrVal &replacee,
390
402
  while (lastIndex <= size && re2->regexp.Match(str, lastIndex, size, anchor, &groups[0], groups.size()))
391
403
  {
392
404
  noMatch = false;
405
+ auto offset = match.data() - data;
393
406
  if (!re2->global && re2->sticky)
394
407
  {
395
- re2->lastIndex += replacee.isBuffer ? match.data() - data + match.size() - lastIndex : getUtf16Length(data + lastIndex, match.data() + match.size());
408
+ re2->lastIndex += replacee.isBuffer ? offset + match.size() - lastIndex : getUtf16Length(data + lastIndex, match.data() + match.size());
409
+ }
410
+ if (match.data() == data || offset > static_cast<long>(lastIndex))
411
+ {
412
+ result += std::string(data + lastIndex, offset - lastIndex);
396
413
  }
414
+ const auto part = replace(replacer, groups, str, input, useBuffers, namedGroups);
415
+ if (part.IsNothing())
416
+ {
417
+ return part;
418
+ }
419
+ result += part.FromJust();
397
420
  if (match.size())
398
421
  {
399
- if (match.data() == data || match.data() - data > static_cast<long>(lastIndex))
400
- {
401
- result += std::string(data + lastIndex, match.data() - data - lastIndex);
402
- }
403
- const auto part = replace(replacer, groups, str, input, useBuffers, namedGroups);
404
- if (part.IsNothing())
405
- {
406
- return part;
407
- }
408
- result += part.FromJust();
409
- lastIndex = match.data() - data + match.size();
422
+ lastIndex = offset + match.size();
423
+ }
424
+ else if (offset < size)
425
+ {
426
+ auto sym_size = getUtf8CharSize(data[offset]);
427
+ result.append(data + offset, sym_size);
428
+ lastIndex = offset + sym_size;
410
429
  }
411
430
  else
412
431
  {
413
- const auto part = replace(replacer, groups, str, input, useBuffers, namedGroups);
414
- if (part.IsNothing())
415
- {
416
- return part;
417
- }
418
- result += part.FromJust();
419
- size_t sym_size = getUtf8CharSize(data[lastIndex]);
420
- if (lastIndex < size)
421
- {
422
- result.append(data + lastIndex, sym_size);
423
- }
424
- lastIndex += sym_size;
432
+ lastIndex = size;
433
+ break;
425
434
  }
426
435
  if (!re2->global)
427
436
  {
package/lib/test.cc CHANGED
@@ -50,7 +50,10 @@ NAN_METHOD(WrappedRE2::Test)
50
50
  }
51
51
  for (size_t n = re2->lastIndex; n; --n)
52
52
  {
53
- lastIndex += getUtf8CharSize(str.data[lastIndex]);
53
+ size_t s = getUtf8CharSize(str.data[lastIndex]);
54
+ lastIndex += s;
55
+ if (s == 4 && n >= 2) --n; // this utf8 character will take two utf16 characters
56
+ // the decrement above is protected to avoid an overflow of an unsigned integer
54
57
  }
55
58
  }
56
59
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "re2",
3
- "version": "1.16.0",
3
+ "version": "1.17.3",
4
4
  "description": "Bindings for RE2: fast, safe alternative to backtracking regular expression engines.",
5
5
  "homepage": "https://github.com/uhop/node-re2",
6
6
  "bugs": "https://github.com/uhop/node-re2/issues",
@@ -10,9 +10,9 @@
10
10
  "test": "tests"
11
11
  },
12
12
  "dependencies": {
13
- "install-artifact-from-github": "^1.2.0",
14
- "nan": "^2.14.2",
15
- "node-gyp": "^8.0.0"
13
+ "install-artifact-from-github": "^1.3.0",
14
+ "nan": "^2.15.0",
15
+ "node-gyp": "^8.4.1"
16
16
  },
17
17
  "devDependencies": {
18
18
  "heya-unit": "^0.3.0"
@@ -20,7 +20,7 @@
20
20
  "scripts": {
21
21
  "test": "node tests/tests.js",
22
22
  "save-to-github": "save-to-github-cache --artifact build/Release/re2.node",
23
- "install": "install-from-cache --artifact build/Release/re2.node --host-var RE2_DOWNLOAD_MIRROR || npm run rebuild",
23
+ "install": "install-from-cache --artifact build/Release/re2.node --host-var RE2_DOWNLOAD_MIRROR --skip-path-var RE2_DOWNLOAD_SKIP_PATH --skip-ver-var RE2_DOWNLOAD_SKIP_VER || npm run rebuild",
24
24
  "verify-build": "node scripts/verify-build.js",
25
25
  "rebuild": "node-gyp rebuild"
26
26
  },
@@ -35,6 +35,6 @@
35
35
  "text processing",
36
36
  "PCRE alternative"
37
37
  ],
38
- "author": "Eugene Lazutkin <eugene.lazutkin@gmail.com> (http://lazutkin.com/)",
38
+ "author": "Eugene Lazutkin <eugene.lazutkin@gmail.com> (https://lazutkin.com/)",
39
39
  "license": "BSD-3-Clause"
40
40
  }
@@ -284,6 +284,22 @@ unit.add(module, [
284
284
  eval(t.TEST("re2.lastIndex === 6"));
285
285
  },
286
286
 
287
+ function test_execSupplemental(t) {
288
+ "use strict";
289
+
290
+ var re = new RE2("\\w+", "g");
291
+ var testString = "🤡🤡🤡 Hello clown world!";
292
+
293
+ var result = re.exec(testString);
294
+ eval(t.TEST("t.unify(result, ['Hello'])"));
295
+
296
+ result = re.exec(testString);
297
+ eval(t.TEST("t.unify(result, ['clown'])"));
298
+
299
+ result = re.exec(testString);
300
+ eval(t.TEST("t.unify(result, ['world'])"));
301
+ },
302
+
287
303
  // Multiline test
288
304
 
289
305
  function test_execMultiline(t) {
@@ -209,6 +209,12 @@ unit.add(module, [
209
209
  eval(t.TEST("s3.length === 1"));
210
210
  eval(t.TEST("RE2.getUtf8Length(s3) === 3"));
211
211
 
212
+ var s4 = "🤡";
213
+
214
+ eval(t.TEST("s4.length === 2"));
215
+ eval(t.TEST("RE2.getUtf8Length(s4) === 4"));
216
+ eval(t.TEST("RE2.getUtf16Length(Buffer.from(s4, 'utf8')) === s4.length"));
217
+
212
218
  var b3 = new Buffer([0xF0]);
213
219
 
214
220
  eval(t.TEST("b3.length === 1"));