re2 1.23.3 → 1.24.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE CHANGED
@@ -7,7 +7,7 @@ The text of the BSD license is reproduced below.
7
7
  The "New" BSD License:
8
8
  **********************
9
9
 
10
- Copyright (c) 2005-2025, Eugene Lazutkin
10
+ Copyright (c) 2005-2026, Eugene Lazutkin
11
11
  All rights reserved.
12
12
 
13
13
  Redistribution and use in source and binary forms, with or without
package/README.md CHANGED
@@ -5,29 +5,22 @@
5
5
 
6
6
  This project provides Node.js bindings for [RE2](https://github.com/google/re2):
7
7
  a fast, safe alternative to backtracking regular expression engines written by [Russ Cox](http://swtch.com/~rsc/) in C++.
8
- To learn more about RE2, start with an overview
9
- [Regular Expression Matching in the Wild](http://swtch.com/~rsc/regexp/regexp3.html). More resources can be found
10
- at his [Implementing Regular Expressions](http://swtch.com/~rsc/regexp/) page.
8
+ To learn more about RE2, start with [Regular Expression Matching in the Wild](http://swtch.com/~rsc/regexp/regexp3.html). More resources are on his [Implementing Regular Expressions](http://swtch.com/~rsc/regexp/) page.
11
9
 
12
-
13
- `RE2`'s regular expression language is almost a superset of what is provided by `RegExp`
10
+ `RE2`'s regular expression language is almost a superset of what `RegExp` provides
14
11
  (see [Syntax](https://github.com/google/re2/wiki/Syntax)),
15
- but it lacks two features: backreferences and lookahead assertions. See below for more details.
16
-
17
- `RE2` always works in the [Unicode mode](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/unicode), which means that all matches that use character codes are interpret as Unicode code points, not as binary values of UTF-16.
18
- See `RE2.unicodeWarningLevel` below for more details.
12
+ but it lacks backreferences and lookahead assertions. See below for details.
19
13
 
14
+ `RE2` always works in [Unicode mode](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/unicode) — character codes are interpreted as Unicode code points, not as binary values of UTF-16.
15
+ See `RE2.unicodeWarningLevel` below for details.
20
16
 
21
- `RE2` object emulates standard `RegExp` making it a practical drop-in replacement in most cases.
22
- `RE2` is extended to provide `String`-based regular expression methods as well. To help to convert
23
- `RegExp` objects to `RE2` its constructor can take `RegExp` directly honoring all properties.
17
+ `RE2` emulates standard `RegExp`, making it a practical drop-in replacement in most cases.
18
+ It also provides `String`-based regular expression methods. The constructor accepts `RegExp` directly, honoring all properties.
24
19
 
25
- It can work with [node.js buffers](http://nodejs.org/api/buffer.html) directly reducing overhead
26
- on recoding and copying characters, and making processing/parsing long files fast.
20
+ It can work with [Node.js Buffers](https://nodejs.org/api/buffer.html) directly, reducing overhead and making processing of long files fast.
27
21
 
28
- This project is implemented in C++ using [nan](https://github.com/nodejs/nan) for Node.js and cannot be used
29
- with non-compliant runtimes like web browsers. All documentation can be found in this README and in
30
- the [wiki](https://github.com/uhop/node-re2/wiki).
22
+ The project is a C++ addon built with [nan](https://github.com/nodejs/nan). It cannot be used in web browsers.
23
+ All documentation is in this README and in the [wiki](https://github.com/uhop/node-re2/wiki).
31
24
 
32
25
  ## Why use node-re2?
33
26
 
@@ -36,16 +29,15 @@ The built-in Node.js regular expression engine can run in exponential time with
36
29
  - "Evil input"
37
30
 
38
31
  This can lead to what is known as a [Regular Expression Denial of Service (ReDoS)](https://www.owasp.org/index.php/Regular_expression_Denial_of_Service_-_ReDoS).
39
- To tell if your regular expressions are vulnerable, you might try the one of these projects:
32
+ To check if your regular expressions are vulnerable, try one of these projects:
40
33
  - [rxxr2](http://www.cs.bham.ac.uk/~hxt/research/rxxr2/)
41
34
  - [safe-regex](https://github.com/substack/safe-regex)
42
35
 
43
- However, neither project is perfect.
36
+ Neither project is perfect.
44
37
 
45
- node-re2 can protect your Node.js application from ReDoS.
46
- node-re2 makes vulnerable regular expression patterns safe by evaluating them in `RE2` instead of the built-in Node.js regex engine.
38
+ node-re2 protects against ReDoS by evaluating patterns in `RE2` instead of the built-in regex engine.
47
39
 
48
- To run the bundled benchmark, use the following command (make sure that node-re2 is properly built before):
40
+ To run the bundled benchmark (make sure node-re2 is built first):
49
41
 
50
42
  ```bash
51
43
  npx nano-bench bench/bad-pattern.mjs
@@ -53,21 +45,22 @@ npx nano-bench bench/bad-pattern.mjs
53
45
 
54
46
  ## Standard features
55
47
 
56
- `RE2` object can be created just like `RegExp`:
48
+ `RE2` objects are created just like `RegExp`:
57
49
 
58
50
  * [`new RE2(pattern[, flags])`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp)
59
51
 
52
+ Supported flags: `g` (global), `i` (ignoreCase), `m` (multiline), `s` (dotAll), `u` (unicode, always on), `y` (sticky), `d` (hasIndices).
53
+
60
54
  Supported properties:
61
55
 
62
56
  * [`re2.lastIndex`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/lastIndex)
63
57
  * [`re2.global`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/global)
64
58
  * [`re2.ignoreCase`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/ignoreCase)
65
59
  * [`re2.multiline`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/multiline)
66
- * [`re2.dotAll`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/dotAll) — *since 1.17.6.*
67
- * [`re2.unicode`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/unicode)
68
- * `RE2` engine always works in the Unicode mode. See details below.
69
- * [`re2.sticky`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/sticky) — *since 1.7.0.*
70
- * [`re2.hasIndices`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/hasIndices) — *since 1.19.0.*
60
+ * [`re2.dotAll`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/dotAll)
61
+ * [`re2.unicode`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/unicode) — always `true`; see details below.
62
+ * [`re2.sticky`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/sticky)
63
+ * [`re2.hasIndices`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/hasIndices)
71
64
  * [`re2.source`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/source)
72
65
  * [`re2.flags`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/flags)
73
66
 
@@ -77,44 +70,43 @@ Supported methods:
77
70
  * [`re2.test(str)`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/test)
78
71
  * [`re2.toString()`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/toString)
79
72
 
80
- Starting with 1.6.0 following well-known symbol-based methods are supported (see [Symbols](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Symbol)):
73
+ Well-known symbol-based methods are supported (see [Symbols](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Symbol)):
81
74
 
82
75
  * [`re2[Symbol.match](str)`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Symbol/match)
83
- * [`re2[Symbol.matchAll](str)`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Symbol/matchAll) — *since 1.17.5.*
76
+ * [`re2[Symbol.matchAll](str)`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Symbol/matchAll)
84
77
  * [`re2[Symbol.search](str)`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Symbol/search)
85
78
  * [`re2[Symbol.replace](str, newSubStr|function)`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Symbol/replace)
86
79
  * [`re2[Symbol.split](str[, limit])`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Symbol/split)
87
80
 
88
- It allows to use `RE2` instances on strings directly, just like `RegExp` instances:
81
+ This lets you use `RE2` instances on strings directly, just like `RegExp`:
89
82
 
90
83
  ```js
91
- var re = new RE2("1");
92
- "213".match(re); // [ '1', index: 1, input: '213' ]
93
- "213".search(re); // 1
94
- "213".replace(re, "+"); // 2+3
95
- "213".split(re); // [ '2', '3' ]
84
+ const re = new RE2('1');
85
+ '213'.match(re); // [ '1', index: 1, input: '213' ]
86
+ '213'.search(re); // 1
87
+ '213'.replace(re, '+'); // 2+3
88
+ '213'.split(re); // [ '2', '3' ]
96
89
 
97
- Array.from("2131".matchAll(re)); // returns a generator!
90
+ Array.from('2131'.matchAll(new RE2('1', 'g'))); // matchAll requires the g flag
98
91
  // [['1', index: 1, input: '2131'], ['1', index: 3, input: '2131']]
99
92
  ```
100
93
 
101
- Starting with 1.8.0 [named groups](https://tc39.github.io/proposal-regexp-named-groups/) are supported.
94
+ [Named groups](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Regular_expressions/Named_capturing_group) are supported.
102
95
 
103
96
  ## Extensions
104
97
 
105
98
  ### Shortcut construction
106
99
 
107
- `RE2` object can be created from a regular expression:
100
+ `RE2` can be created from a regular expression:
108
101
 
109
102
  ```js
110
- var re1 = new RE2(/ab*/ig); // from a RegExp object
111
- var re2 = new RE2(re1); // from another RE2 object
103
+ const re1 = new RE2(/ab*/ig); // from a RegExp object
104
+ const re2 = new RE2(re1); // from another RE2 object
112
105
  ```
113
106
 
114
107
  ### `String` methods
115
108
 
116
- Standard `String` defines four more methods that can use regular expressions. `RE2` provides them as methods
117
- exchanging positions of a string, and a regular expression:
109
+ `RE2` provides the standard `String` regex methods with swapped receiver and argument:
118
110
 
119
111
  * `re2.match(str)`
120
112
  * See [`str.match(regexp)`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/match)
@@ -125,12 +117,11 @@ exchanging positions of a string, and a regular expression:
125
117
  * `re2.split(str[, limit])`
126
118
  * See [`str.split(regexp[, limit])`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/split)
127
119
 
128
- Starting with 1.6.0, these methods added as well-known symbol-based methods to be used transparently with ES6 string/regex machinery.
120
+ These methods are also available as well-known symbol-based methods for transparent use with ES6 string/regex machinery.
129
121
 
130
122
  ### `Buffer` support
131
123
 
132
- In order to support `Buffer` directly, most methods can accept buffers instead of strings. It speeds up all operations.
133
- Following signatures are supported:
124
+ Most methods accept Buffers instead of strings for direct UTF-8 processing:
134
125
 
135
126
  * `re2.exec(buf)`
136
127
  * `re2.test(buf)`
@@ -139,18 +130,16 @@ Following signatures are supported:
139
130
  * `re2.split(buf[, limit])`
140
131
  * `re2.replace(buf, replacer)`
141
132
 
142
- Differences with their string-based versions:
133
+ Differences from string-based versions:
143
134
 
144
- * All buffers are assumed to be encoded as [UTF-8](http://en.wikipedia.org/wiki/UTF-8)
135
+ * All buffers are assumed to be encoded as [UTF-8](https://en.wikipedia.org/wiki/UTF-8)
145
136
  (ASCII is a proper subset of UTF-8).
146
- * Instead of strings they return `Buffer` objects, even in composite objects. A buffer can be converted to a string with
147
- [`buf.toString()`](http://nodejs.org/api/buffer.html#buffer_buf_tostring_encoding_start_end).
148
- * All offsets and lengths are in bytes, rather than characters (each UTF-8 character can occupy from 1 to 4 bytes).
149
- This way users can properly slice buffers without costly recalculations from characters to bytes.
137
+ * Results are `Buffer` objects, even in composite objects. Convert with
138
+ [`buf.toString()`](https://nodejs.org/api/buffer.html#buffer_buf_tostring_encoding_start_end).
139
+ * All offsets and lengths are in bytes, not characters (each UTF-8 character occupies 14 bytes).
140
+ This lets you slice buffers directly without costly character-to-byte recalculations.
150
141
 
151
- When `re2.replace()` is used with a replacer function, the replacer can return a buffer, or a string. But all arguments
152
- (except for an input object) will be strings, and an offset will be in characters. If you prefer to deal
153
- with buffers and byte offsets in a replacer function, set a property `useBuffers` to `true` on the function:
142
+ When `re2.replace()` is used with a replacer function, the replacer receives string arguments and character offsets by default. Set `useBuffers` to `true` on the function to receive byte offsets instead:
154
143
 
155
144
  ```js
156
145
  function strReplacer(match, offset, input) {
@@ -171,17 +160,13 @@ RE2("б").replace("абв", bufReplacer);
171
160
  // "а<= 2 bytes|в"
172
161
  ```
173
162
 
174
- This feature works for string and buffer inputs. If a buffer was used as an input, its output will be returned as
175
- a buffer too, otherwise a string will be returned.
163
+ This works for both string and buffer inputs. Buffer input produces buffer output; string input produces string output.
176
164
 
177
165
  ### `RE2.Set`
178
166
 
179
- Starting with 1.23.0, use `RE2.Set` when the same string must be tested against many patterns. It builds a single automaton
180
- for all of them and frequently beats running a large list of individual regular expressions one by one.
167
+ Use `RE2.Set` when the same string must be tested against many patterns. It builds a single automaton and frequently beats running individual regular expressions one by one.
181
168
 
182
- Sets support `test()` and `match()` methods. While `test()` can be simulated by combining patterns with `|` and using a regular expression object,
183
- `match()` is not because it returns a list of patterns that matched, which is not possible with a regular expression object.
184
- Parsing data against multiple choices is a frequent operation in the wild and `RE2.Set` is a fast way to do it.
169
+ While `test()` can be simulated by combining patterns with `|`, `match()` returns which patterns matched &mdash; something a single regular expression cannot do.
185
170
 
186
171
  * `new RE2.Set(patterns[, flagsOrOptions][, options])`
187
172
  * `patterns` is any iterable of strings, `Buffer`s, `RegExp`, or `RE2` instances; flags (if provided) apply to the whole set.
@@ -211,7 +196,7 @@ routes.sources; // ['^/users/\\d+$', '^/posts/\\d+$']
211
196
  routes.toString(); // '/^/users/\\d+$|^/posts/\\d+$/iu'
212
197
  ```
213
198
 
214
- To run the bundled benchmark, use the following command (make sure that node-re2 is properly built before):
199
+ To run the bundled benchmark (make sure node-re2 is built first):
215
200
 
216
201
  ```bash
217
202
  npx nano-bench bench/set-match.mjs
@@ -219,27 +204,20 @@ npx nano-bench bench/set-match.mjs
219
204
 
220
205
  ### Calculate length
221
206
 
222
- Two functions to calculate string sizes between
223
- [UTF-8](http://en.wikipedia.org/wiki/UTF-8) and
224
- [UTF-16](http://en.wikipedia.org/wiki/UTF-16) are exposed on `RE2`:
225
-
226
- * `RE2.getUtf8Length(str)` &mdash; calculates a buffer size in bytes to encode a UTF-16 string as
227
- a UTF-8 buffer.
228
- * `RE2.getUtf16Length(buf)` &mdash; calculates a string size in characters to encode a UTF-8 buffer as
229
- a UTF-16 string.
207
+ Two helpers convert between UTF-8 and UTF-16 sizes:
230
208
 
231
- JavaScript supports UCS-2 strings with 16-bit characters, while node.js 0.11 supports full UTF-16 as
232
- a default string.
209
+ * `RE2.getUtf8Length(str)` &mdash; byte size needed to encode a string as a UTF-8 buffer.
210
+ * `RE2.getUtf16Length(buf)` &mdash; character count needed to decode a UTF-8 buffer as a string.
233
211
 
234
212
  ### Property: `internalSource`
235
213
 
236
- Starting 1.8.0 property `source` emulates the same property of `RegExp`, meaning that it can be used to create an identical `RE2` or `RegExp` instance. Sometimes, for troubleshooting purposes, a user wants to inspect a `RE2` translated source. It is available as a read-only property called `internalSource`.
214
+ `source` emulates the standard `RegExp` property and can recreate an identical `RE2` or `RegExp` instance. To inspect the RE2-translated pattern (useful for debugging), use the read-only `internalSource` property.
237
215
 
238
216
  ### Unicode warning level
239
217
 
240
- `RE2` engine always works in the Unicode mode. In most cases either there is no difference or the Unicode mode is actually preferred. But sometimes a user wants a tight control over their regular expressions. For those cases, there is a static string property `RE2.unicodeWarningLevel`.
218
+ `RE2` always works in Unicode mode. In most cases this is either invisible or preferred. For applications that need tight control, the static property `RE2.unicodeWarningLevel` governs what happens when a non-Unicode regular expression is created.
241
219
 
242
- Regular expressions in the Unicode mode work as usual. But if a regular expression lacks the Unicode flag, it is always added silently.
220
+ If a regular expression lacks the `u` flag, it is added silently by default:
243
221
 
244
222
  ```js
245
223
  const x = /./;
@@ -248,119 +226,111 @@ const y = new RE2(x);
248
226
  y.flags; // 'u'
249
227
  ```
250
228
 
251
- In the latter case `RE2` can do following actions depending on `RE2.unicodeWarningLevel`:
229
+ Values of `RE2.unicodeWarningLevel`:
252
230
 
253
- * `'nothing'` (the default): no warnings or notifications of any kind, a regular expression will be created with `'u'` flag.
254
- * `'warnOnce'`: warns exactly once the very first time, a regular expression will be created with `'u'` flag.
255
- * Assigning this value resets an internal flag, so `RE2` will warn once again.
256
- * `'warn'`: warns every time, a regular expression will be created with `'u'` flag.
257
- * `'throw'`: throws a `SyntaxError` every time.
258
- * All other warning level values are silently ignored on asignment leaving the previous value unchanged.
231
+ * `'nothing'` (default) &mdash; silently add `u`.
232
+ * `'warnOnce'` &mdash; warn once, then silently add `u`. Assigning this value resets the one-time flag.
233
+ * `'warn'` &mdash; warn every time, still add `u`.
234
+ * `'throw'` &mdash; throw `SyntaxError`.
235
+ * Any other value is silently ignored, leaving the previous value unchanged.
259
236
 
260
- Warnings and exceptions help to audit an application for stray non-Unicode regular expressions.
237
+ Warnings and exceptions help audit an application for stray non-Unicode regular expressions.
261
238
 
262
- `RE2.unicodeWarningLevel` is a global property. Be careful manipulating it in a multi-threaded environment as it is shared between threads.
239
+ `RE2.unicodeWarningLevel` is global. Be careful in multi-threaded environments &mdash; it is shared across threads.
263
240
 
264
241
  ## How to install
265
242
 
266
- Installation:
267
-
268
243
  ```bash
269
- npm install --save re2
244
+ npm install re2
270
245
  ```
271
246
 
272
- While the project is known to work with other package managers, it is not guaranteed nor tested.
273
- For example, [yarn](https://yarnpkg.com/) is known to fail in some scenarios
274
- (see this [Wiki article](https://github.com/uhop/node-re2/wiki/Problem:-unusual-errors-with-yarn)).
247
+ The project works with other package managers but is not tested with them.
248
+ See the wiki for notes on [yarn](https://github.com/uhop/node-re2/wiki/Using-with-yarn) and [pnpm](https://github.com/uhop/node-re2/wiki/Using-with-pnpm).
275
249
 
276
250
  ### Precompiled artifacts
277
251
 
278
- When installing re2 the [install script](https://github.com/uhop/install-artifact-from-github/blob/master/bin/install-from-cache.js) attempts to download a prebuilt artifact for your system from the Github releases. The download location can be overridden by setting the `RE2_DOWNLOAD_MIRROR` environment variable as seen in the install script.
252
+ The [install script](https://github.com/uhop/install-artifact-from-github/blob/master/bin/install-from-cache.js) attempts to download a prebuilt artifact from GitHub Releases. Override the download location with the `RE2_DOWNLOAD_MIRROR` environment variable.
279
253
 
280
- If all attempts to download the prebuilt artifact for your system fails the script attempts to built re2 locally on your machine using [node-gyp](https://github.com/nodejs/node-gyp).
254
+ If the download fails, the script builds RE2 locally using [node-gyp](https://github.com/nodejs/node-gyp).
281
255
 
282
256
  ## How to use
283
257
 
284
- It is used just like a `RegExp` object.
258
+ It is used just like `RegExp`.
285
259
 
286
260
  ```js
287
- var RE2 = require("re2");
261
+ const RE2 = require('re2');
288
262
 
289
263
  // with default flags
290
- var re = new RE2("a(b*)");
291
- var result = re.exec("abbc");
292
- console.log(result[0]); // "abb"
293
- console.log(result[1]); // "bb"
264
+ let re = new RE2('a(b*)');
265
+ let result = re.exec('abbc');
266
+ console.log(result[0]); // 'abb'
267
+ console.log(result[1]); // 'bb'
294
268
 
295
- result = re.exec("aBbC");
296
- console.log(result[0]); // "a"
297
- console.log(result[1]); // ""
269
+ result = re.exec('aBbC');
270
+ console.log(result[0]); // 'a'
271
+ console.log(result[1]); // ''
298
272
 
299
273
  // with explicit flags
300
- re = new RE2("a(b*)", "i");
301
- result = re.exec("aBbC");
302
- console.log(result[0]); // "aBb"
303
- console.log(result[1]); // "Bb"
274
+ re = new RE2('a(b*)', 'i');
275
+ result = re.exec('aBbC');
276
+ console.log(result[0]); // 'aBb'
277
+ console.log(result[1]); // 'Bb'
304
278
 
305
279
  // from regular expression object
306
- var regexp = new RegExp("a(b*)", "i");
280
+ const regexp = new RegExp('a(b*)', 'i');
307
281
  re = new RE2(regexp);
308
- result = re.exec("aBbC");
309
- console.log(result[0]); // "aBb"
310
- console.log(result[1]); // "Bb"
282
+ result = re.exec('aBbC');
283
+ console.log(result[0]); // 'aBb'
284
+ console.log(result[1]); // 'Bb'
311
285
 
312
286
  // from regular expression literal
313
287
  re = new RE2(/a(b*)/i);
314
- result = re.exec("aBbC");
315
- console.log(result[0]); // "aBb"
316
- console.log(result[1]); // "Bb"
288
+ result = re.exec('aBbC');
289
+ console.log(result[0]); // 'aBb'
290
+ console.log(result[1]); // 'Bb'
317
291
 
318
292
  // from another RE2 object
319
- var rex = new RE2(re);
320
- result = rex.exec("aBbC");
321
- console.log(result[0]); // "aBb"
322
- console.log(result[1]); // "Bb"
293
+ const rex = new RE2(re);
294
+ result = rex.exec('aBbC');
295
+ console.log(result[0]); // 'aBb'
296
+ console.log(result[1]); // 'Bb'
323
297
 
324
298
  // shortcut
325
- result = new RE2("ab*").exec("abba");
299
+ result = new RE2('ab*').exec('abba');
326
300
 
327
301
  // factory
328
- result = RE2("ab*").exec("abba");
302
+ result = RE2('ab*').exec('abba');
329
303
  ```
330
304
 
331
305
  ## Limitations (things RE2 does not support)
332
306
 
333
- `RE2` consciously avoids any regular expression features that require worst-case exponential time to evaluate.
334
- These features are essentially those that describe a Context-Free Language (CFL) rather than a Regular Expression,
335
- and are extensions to the traditional regular expression language because some people don't know when enough is enough.
336
-
337
- The most noteworthy missing features are backreferences and lookahead assertions.
338
- If your application uses these features, you should continue to use `RegExp`.
339
- But since these features are fundamentally vulnerable to
307
+ `RE2` avoids any regular expression features that require worst-case exponential time to evaluate.
308
+ The most notable missing features are backreferences and lookahead assertions.
309
+ If your application uses them, you should continue to use `RegExp` &mdash;
310
+ but since they are fundamentally vulnerable to
340
311
  [ReDoS](https://www.owasp.org/index.php/Regular_expression_Denial_of_Service_-_ReDoS),
341
- you should strongly consider replacing them.
312
+ consider replacing them.
342
313
 
343
- `RE2` will throw a `SyntaxError` if you try to declare a regular expression using these features.
344
- If you are evaluating an externally-provided regular expression, wrap your `RE2` declarations in a try-catch block. It allows to use `RegExp`, when `RE2` misses a feature:
314
+ `RE2` throws `SyntaxError` for unsupported features.
315
+ Wrap `RE2` declarations in a try-catch to fall back to `RegExp`:
345
316
 
346
317
  ```js
347
- var re = /(a)+(b)*/;
318
+ let re = /(a)+(b)*/;
348
319
  try {
349
320
  re = new RE2(re);
350
321
  // use RE2 as a drop-in replacement
351
322
  } catch (e) {
352
- // suppress an error, and use
353
- // the original RegExp
323
+ // use the original RegExp
354
324
  }
355
- var result = re.exec(sample);
325
+ const result = re.exec(sample);
356
326
  ```
357
327
 
358
- In addition to these missing features, `RE2` also behaves somewhat differently from the built-in regular expression engine in corner cases.
328
+ `RE2` may also behave differently from the built-in engine in corner cases.
359
329
 
360
330
  ### Backreferences
361
331
 
362
- `RE2` doesn't support backreferences, which are numbered references to previously
363
- matched groups, like so: `\1`, `\2`, and so on. Example of backrefrences:
332
+ `RE2` does not support backreferences &mdash; numbered references to previously
333
+ matched groups (`\1`, `\2`, etc.). Example:
364
334
 
365
335
  ```js
366
336
  /(cat|dog)\1/.test("catcat"); // true
@@ -371,7 +341,7 @@ matched groups, like so: `\1`, `\2`, and so on. Example of backrefrences:
371
341
 
372
342
  ### Lookahead assertions
373
343
 
374
- `RE2` doesn't support lookahead assertions, which are ways to allow a matching dependent on subsequent contents.
344
+ `RE2` does not support lookahead assertions, which make a match depend on subsequent contents.
375
345
 
376
346
  ```js
377
347
  /abc(?=def)/; // match abc only if it is followed by def
@@ -380,22 +350,22 @@ matched groups, like so: `\1`, `\2`, and so on. Example of backrefrences:
380
350
 
381
351
  ### Mismatched behavior
382
352
 
383
- `RE2` and the built-in regex engines disagree a bit. Before you switch to `RE2`, verify that your regular expressions continue to work as expected. They should do so in the vast majority of cases.
353
+ `RE2` and the built-in engine may disagree in edge cases. Verify your regular expressions before switching. They should work in the vast majority of cases.
384
354
 
385
- Here is an example of a case where they may not:
355
+ Example:
386
356
 
387
357
  ```js
388
- var RE2 = require("../re2");
358
+ const RE2 = require('re2');
389
359
 
390
- var pattern = '(?:(a)|(b)|(c))+';
360
+ const pattern = '(?:(a)|(b)|(c))+';
391
361
 
392
- var built_in = new RegExp(pattern);
393
- var re2 = new RE2(pattern);
362
+ const built_in = new RegExp(pattern);
363
+ const re2 = new RE2(pattern);
394
364
 
395
- var input = 'abc';
365
+ const input = 'abc';
396
366
 
397
- var bi_res = built_in.exec(input);
398
- var re2_res = re2.exec(input);
367
+ const bi_res = built_in.exec(input);
368
+ const re2_res = re2.exec(input);
399
369
 
400
370
  console.log('bi_res: ' + bi_res); // prints: bi_res: abc,,,c
401
371
  console.log('re2_res : ' + re2_res); // prints: re2_res : abc,a,b,c
@@ -403,19 +373,19 @@ console.log('re2_res : ' + re2_res); // prints: re2_res : abc,a,b,c
403
373
 
404
374
  ### Unicode
405
375
 
406
- `RE2` always works in the Unicode mode. See `RE2.unicodeWarningLevel` above for more details on how to control warnings about this feature.
376
+ `RE2` always works in Unicode mode. See `RE2.unicodeWarningLevel` above for details.
407
377
 
408
378
  #### Unicode classes `\p{...}` and `\P{...}`
409
379
 
410
- `RE2` supports a subset of Unicode classes as defined in [RE2 Syntax](https://github.com/google/re2/wiki/Syntax). Native Google RE2 supports only short names, e.g., `L` for `Letter`, `N` for `Number`, etc. Like `RegExp`, `RE2` supports both short and long names, e.g., `Letter` for `L`, by translating them to short names.
411
-
412
- Generally, the extended form `\p{name=value}` is not supported. Only form `\p{name}` is supported.
413
- The exception is `Script` and `sc` names, e.g., `\p{Script=Latin}` and `\p{sc=Cyrillic}`.
380
+ `RE2` supports a subset of Unicode classes as defined in [RE2 Syntax](https://github.com/google/re2/wiki/Syntax). Google RE2 natively supports only short names (e.g., `L` for `Letter`). Like `RegExp`, node-re2 also accepts long names by translating them to short names.
414
381
 
382
+ Only the `\p{name}` form is supported, not `\p{name=value}` in general.
383
+ The exception is `Script` and `sc`, e.g., `\p{Script=Latin}` and `\p{sc=Cyrillic}`.
415
384
  The same applies to `\P{...}`.
416
385
 
417
386
  ## Release history
418
387
 
388
+ - 1.24.0 *Fixed multi-threaded crash in worker threads (#235). Added named import: `import {RE2} from 're2'`. Added CJS test. Updated docs and dependencies.*
419
389
  - 1.23.3 *Updated Abseil and dev dependencies.*
420
390
  - 1.23.2 *Updated dev dependencies.*
421
391
  - 1.23.1 *Updated Abseil and dev dependencies.*
package/lib/accessors.cc CHANGED
@@ -178,7 +178,7 @@ NAN_SETTER(WrappedRE2::SetLastIndex)
178
178
  }
179
179
  }
180
180
 
181
- WrappedRE2::UnicodeWarningLevels WrappedRE2::unicodeWarningLevel;
181
+ std::atomic<WrappedRE2::UnicodeWarningLevels> WrappedRE2::unicodeWarningLevel{WrappedRE2::NOTHING};
182
182
 
183
183
  NAN_GETTER(WrappedRE2::GetUnicodeWarningLevel)
184
184
  {
package/lib/addon.cc CHANGED
@@ -1,5 +1,36 @@
1
1
  #include "./wrapped_re2.h"
2
2
  #include "./wrapped_re2_set.h"
3
+ #include "./isolate_data.h"
4
+
5
+ #include <mutex>
6
+ #include <unordered_map>
7
+
8
+ static std::mutex addonDataMutex;
9
+ static std::unordered_map<v8::Isolate *, AddonData *> addonDataMap;
10
+
11
+ AddonData *getAddonData(v8::Isolate *isolate)
12
+ {
13
+ std::lock_guard<std::mutex> lock(addonDataMutex);
14
+ auto it = addonDataMap.find(isolate);
15
+ return it != addonDataMap.end() ? it->second : nullptr;
16
+ }
17
+
18
+ void setAddonData(v8::Isolate *isolate, AddonData *data)
19
+ {
20
+ std::lock_guard<std::mutex> lock(addonDataMutex);
21
+ addonDataMap[isolate] = data;
22
+ }
23
+
24
+ void deleteAddonData(v8::Isolate *isolate)
25
+ {
26
+ std::lock_guard<std::mutex> lock(addonDataMutex);
27
+ auto it = addonDataMap.find(isolate);
28
+ if (it != addonDataMap.end())
29
+ {
30
+ delete it->second;
31
+ addonDataMap.erase(it);
32
+ }
33
+ }
3
34
 
4
35
  static NAN_METHOD(GetUtf8Length)
5
36
  {
@@ -26,8 +57,7 @@ static NAN_METHOD(GetUtf16Length)
26
57
  static void cleanup(void *p)
27
58
  {
28
59
  v8::Isolate *isolate = static_cast<v8::Isolate *>(p);
29
- auto p_tpl = Nan::GetIsolateData<Nan::Persistent<v8::FunctionTemplate>>(isolate);
30
- delete p_tpl;
60
+ deleteAddonData(isolate);
31
61
  }
32
62
 
33
63
  // NAN_MODULE_INIT(WrappedRE2::Init)
@@ -42,10 +72,11 @@ v8::Local<v8::Function> WrappedRE2::Init()
42
72
  auto instanceTemplate = tpl->InstanceTemplate();
43
73
  instanceTemplate->SetInternalFieldCount(1);
44
74
 
45
- // save the template
75
+ // save the template in per-isolate storage
46
76
  auto isolate = v8::Isolate::GetCurrent();
47
- auto p_tpl = new Nan::Persistent<v8::FunctionTemplate>(tpl);
48
- Nan::SetIsolateData(isolate, p_tpl);
77
+ auto data = new AddonData();
78
+ data->re2Tpl.Reset(tpl);
79
+ setAddonData(isolate, data);
49
80
  node::AddEnvironmentCleanupHook(isolate, cleanup, isolate);
50
81
 
51
82
  // prototype
@@ -0,0 +1,12 @@
1
+ #pragma once
2
+
3
+ #include <nan.h>
4
+
5
+ struct AddonData {
6
+ Nan::Persistent<v8::FunctionTemplate> re2Tpl;
7
+ Nan::Persistent<v8::FunctionTemplate> re2SetTpl;
8
+ };
9
+
10
+ AddonData *getAddonData(v8::Isolate *isolate);
11
+ void setAddonData(v8::Isolate *isolate, AddonData *data);
12
+ void deleteAddonData(v8::Isolate *isolate);
package/lib/new.cc CHANGED
@@ -8,7 +8,7 @@
8
8
  #include <unordered_set>
9
9
  #include <vector>
10
10
 
11
- bool WrappedRE2::alreadyWarnedAboutUnicode = false;
11
+ std::atomic<bool> WrappedRE2::alreadyWarnedAboutUnicode{false};
12
12
 
13
13
  static const char *deprecationMessage = "BMP patterns aren't supported by node-re2. An implicit \"u\" flag is assumed by the RE2 constructor. In a future major version, calling the RE2 constructor without the \"u\" flag may become forbidden, or cause a different behavior. Please see https://github.com/uhop/node-re2/issues/21 for more information.";
14
14
 
@@ -40,8 +40,9 @@ NAN_METHOD(WrappedRE2::New)
40
40
  parameters[i] = info[i];
41
41
  }
42
42
  auto isolate = v8::Isolate::GetCurrent();
43
- auto p_tpl = Nan::GetIsolateData<Nan::Persistent<v8::FunctionTemplate>>(isolate);
44
- auto newObject = Nan::NewInstance(Nan::GetFunction(p_tpl->Get(isolate)).ToLocalChecked(), parameters.size(), &parameters[0]);
43
+ auto data = getAddonData(isolate);
44
+ if (!data) return;
45
+ auto newObject = Nan::NewInstance(Nan::GetFunction(data->re2Tpl.Get(isolate)).ToLocalChecked(), parameters.size(), &parameters[0]);
45
46
  if (!newObject.IsEmpty())
46
47
  {
47
48
  info.GetReturnValue().Set(newObject.ToLocalChecked());
package/lib/pattern.cc CHANGED
@@ -14,7 +14,7 @@ inline bool isUpperCaseAlpha(char ch)
14
14
 
15
15
  inline bool isHexadecimal(char ch)
16
16
  {
17
- return ('0' <= ch && ch <= '9') || ('A' <= ch && ch <= 'Z') || ('a' <= ch && ch <= 'z');
17
+ return ('0' <= ch && ch <= '9') || ('A' <= ch && ch <= 'F') || ('a' <= ch && ch <= 'f');
18
18
  }
19
19
 
20
20
  static std::map<std::string, std::string> unicodeClasses = {
package/lib/set.cc CHANGED
@@ -8,8 +8,6 @@
8
8
  #include <string>
9
9
  #include <vector>
10
10
 
11
- Nan::Persistent<v8::FunctionTemplate> WrappedRE2Set::constructor;
12
-
13
11
  struct SetFlags
14
12
  {
15
13
  bool global = false;
@@ -342,7 +340,10 @@ NAN_METHOD(WrappedRE2Set::New)
342
340
  {
343
341
  parameters[i] = info[i];
344
342
  }
345
- auto maybeNew = Nan::NewInstance(Nan::GetFunction(Nan::New(constructor)).ToLocalChecked(), parameters.size(), &parameters[0]);
343
+ auto isolate = context->GetIsolate();
344
+ auto addonData = getAddonData(isolate);
345
+ if (!addonData) return;
346
+ auto maybeNew = Nan::NewInstance(Nan::GetFunction(addonData->re2SetTpl.Get(isolate)).ToLocalChecked(), parameters.size(), &parameters[0]);
346
347
  if (!maybeNew.IsEmpty())
347
348
  {
348
349
  info.GetReturnValue().Set(maybeNew.ToLocalChecked());
@@ -772,6 +773,11 @@ v8::Local<v8::Function> WrappedRE2Set::Init()
772
773
  Nan::SetAccessor(instanceTemplate, Nan::New("size").ToLocalChecked(), GetSize);
773
774
  Nan::SetAccessor(instanceTemplate, Nan::New("anchor").ToLocalChecked(), GetAnchor);
774
775
 
775
- constructor.Reset(tpl);
776
+ auto isolate = v8::Isolate::GetCurrent();
777
+ auto data = getAddonData(isolate);
778
+ if (data)
779
+ {
780
+ data->re2SetTpl.Reset(tpl);
781
+ }
776
782
  return scope.Escape(Nan::GetFunction(tpl).ToLocalChecked());
777
783
  }
package/lib/split.cc CHANGED
@@ -77,7 +77,14 @@ NAN_METHOD(WrappedRE2::Split)
77
77
  for (size_t i = 0, n = std::min(pieces.size(), limit); i < n; ++i)
78
78
  {
79
79
  const auto &item = pieces[i];
80
- Nan::Set(result, i, Nan::CopyBuffer(item.data(), item.size()).ToLocalChecked());
80
+ if (item.data())
81
+ {
82
+ Nan::Set(result, i, Nan::CopyBuffer(item.data(), item.size()).ToLocalChecked());
83
+ }
84
+ else
85
+ {
86
+ Nan::Set(result, i, Nan::Undefined());
87
+ }
81
88
  }
82
89
  }
83
90
  else
@@ -85,7 +92,14 @@ NAN_METHOD(WrappedRE2::Split)
85
92
  for (size_t i = 0, n = std::min(pieces.size(), limit); i < n; ++i)
86
93
  {
87
94
  const auto &item = pieces[i];
88
- Nan::Set(result, i, Nan::New(item.data(), item.size()).ToLocalChecked());
95
+ if (item.data())
96
+ {
97
+ Nan::Set(result, i, Nan::New(item.data(), item.size()).ToLocalChecked());
98
+ }
99
+ else
100
+ {
101
+ Nan::Set(result, i, Nan::Undefined());
102
+ }
89
103
  }
90
104
  }
91
105
 
package/lib/test.cc CHANGED
@@ -27,7 +27,7 @@ NAN_METHOD(WrappedRE2::Test)
27
27
  if (!str.isValidIndex)
28
28
  {
29
29
  re2->lastIndex = 0;
30
- info.GetReturnValue().SetNull();
30
+ info.GetReturnValue().Set(false);
31
31
  return;
32
32
  }
33
33
 
package/lib/to_string.cc CHANGED
@@ -20,6 +20,10 @@ NAN_METHOD(WrappedRE2::ToString)
20
20
  buffer += re2->source;
21
21
  buffer += "/";
22
22
 
23
+ if (re2->hasIndices)
24
+ {
25
+ buffer += "d";
26
+ }
23
27
  if (re2->global)
24
28
  {
25
29
  buffer += "g";
package/lib/wrapped_re2.h CHANGED
@@ -1,9 +1,12 @@
1
1
  #pragma once
2
2
 
3
+ #include <atomic>
3
4
  #include <string>
4
5
  #include <nan.h>
5
6
  #include <re2/re2.h>
6
7
 
8
+ #include "./isolate_data.h"
9
+
7
10
  struct StrVal
8
11
  {
9
12
  char *data;
@@ -86,8 +89,9 @@ public:
86
89
  static inline bool HasInstance(v8::Local<v8::Object> object)
87
90
  {
88
91
  auto isolate = v8::Isolate::GetCurrent();
89
- auto p_tpl = Nan::GetIsolateData<Nan::Persistent<v8::FunctionTemplate>>(isolate);
90
- return p_tpl->Get(isolate)->HasInstance(object);
92
+ auto data = getAddonData(isolate);
93
+ if (!data || data->re2Tpl.IsEmpty()) return false;
94
+ return data->re2Tpl.Get(isolate)->HasInstance(object);
91
95
  }
92
96
 
93
97
  enum UnicodeWarningLevels
@@ -97,8 +101,8 @@ public:
97
101
  WARN,
98
102
  THROW
99
103
  };
100
- static UnicodeWarningLevels unicodeWarningLevel;
101
- static bool alreadyWarnedAboutUnicode;
104
+ static std::atomic<UnicodeWarningLevels> unicodeWarningLevel;
105
+ static std::atomic<bool> alreadyWarnedAboutUnicode;
102
106
 
103
107
  re2::RE2 regexp;
104
108
  std::string source;
@@ -120,8 +124,6 @@ private:
120
124
  void dropCache();
121
125
  const StrVal &prepareArgument(const v8::Local<v8::Value> &arg, bool ignoreLastIndex = false);
122
126
  void doneWithLastString();
123
-
124
- friend struct PrepareLastString;
125
127
  };
126
128
 
127
129
  struct PrepareLastString
@@ -4,6 +4,8 @@
4
4
  #include <re2/re2.h>
5
5
  #include <re2/set.h>
6
6
 
7
+ #include "./isolate_data.h"
8
+
7
9
  #include <string>
8
10
  #include <vector>
9
11
 
@@ -14,7 +16,9 @@ public:
14
16
  static inline bool HasInstance(v8::Local<v8::Object> object)
15
17
  {
16
18
  auto isolate = v8::Isolate::GetCurrent();
17
- return !constructor.IsEmpty() && constructor.Get(isolate)->HasInstance(object);
19
+ auto data = getAddonData(isolate);
20
+ if (!data || data->re2SetTpl.IsEmpty()) return false;
21
+ return data->re2SetTpl.Get(isolate)->HasInstance(object);
18
22
  }
19
23
 
20
24
  private:
@@ -31,8 +35,6 @@ private:
31
35
  static NAN_GETTER(GetSize);
32
36
  static NAN_GETTER(GetAnchor);
33
37
 
34
- static Nan::Persistent<v8::FunctionTemplate> constructor;
35
-
36
38
  re2::RE2::Set set;
37
39
  std::vector<std::string> sources;
38
40
  std::string combinedSource;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "re2",
3
- "version": "1.23.3",
3
+ "version": "1.24.0",
4
4
  "description": "Bindings for RE2: fast, safe alternative to backtracking regular expression engines.",
5
5
  "homepage": "https://github.com/uhop/node-re2",
6
6
  "bugs": "https://github.com/uhop/node-re2/issues",
@@ -16,19 +16,21 @@
16
16
  ],
17
17
  "dependencies": {
18
18
  "install-artifact-from-github": "^1.4.0",
19
- "nan": "^2.25.0",
19
+ "nan": "^2.26.2",
20
20
  "node-gyp": "^12.2.0"
21
21
  },
22
22
  "devDependencies": {
23
- "@types/node": "^25.2.2",
24
- "nano-benchmark": "^1.0.9",
25
- "tape-six": "^1.7.0",
26
- "tape-six-proc": "^1.2.2",
27
- "typescript": "^5.9.3"
23
+ "@types/node": "^25.5.0",
24
+ "nano-benchmark": "^1.0.15",
25
+ "prettier": "^3.8.1",
26
+ "tape-six": "^1.7.13",
27
+ "tape-six-proc": "^1.2.8",
28
+ "typescript": "^6.0.2"
28
29
  },
29
30
  "scripts": {
30
- "test": "tape6-proc --flags FO",
31
+ "test": "tape6 --flags FO",
31
32
  "test:seq": "tape6-seq --flags FO",
33
+ "test:proc": "tape6-proc --flags FO",
32
34
  "save-to-github": "save-to-github-cache --artifact build/Release/re2.node",
33
35
  "install": "install-from-cache --artifact build/Release/re2.node --host-var RE2_DOWNLOAD_MIRROR --skip-path-var RE2_DOWNLOAD_SKIP_PATH --skip-ver-var RE2_DOWNLOAD_SKIP_VER || node-gyp -j max rebuild",
34
36
  "verify-build": "node scripts/verify-build.js",
@@ -56,11 +58,12 @@
56
58
  "PCRE alternative"
57
59
  ],
58
60
  "author": "Eugene Lazutkin <eugene.lazutkin@gmail.com> (https://lazutkin.com/)",
61
+ "funding": "https://github.com/sponsors/uhop",
59
62
  "license": "BSD-3-Clause",
60
63
  "tape6": {
61
64
  "tests": [
62
- "/tests/test-*.mjs",
63
- "/tests/test-*.ts"
65
+ "/tests/test-*.*js",
66
+ "/tests/test-*.*ts"
64
67
  ]
65
68
  }
66
69
  }
package/re2.d.ts CHANGED
@@ -1,3 +1,5 @@
1
+ /// <reference types="node" />
2
+
1
3
  declare module 're2' {
2
4
  interface RE2BufferExecArray {
3
5
  index: number;
@@ -6,6 +8,7 @@ declare module 're2' {
6
8
  groups?: {
7
9
  [key: string]: Buffer;
8
10
  };
11
+ indices?: RegExpIndicesArray;
9
12
  }
10
13
 
11
14
  interface RE2BufferMatchArray {
@@ -18,6 +21,7 @@ declare module 're2' {
18
21
  }
19
22
 
20
23
  interface RE2 extends RegExp {
24
+ readonly internalSource: string;
21
25
  exec(str: string): RegExpExecArray | null;
22
26
  exec(str: Buffer): RE2BufferExecArray | null;
23
27
 
@@ -82,6 +86,7 @@ declare module 're2' {
82
86
  getUtf16Length(value: Buffer): number;
83
87
 
84
88
  Set: RE2SetConstructor;
89
+ RE2: RE2Constructor;
85
90
  }
86
91
 
87
92
  var RE2: RE2Constructor;
package/re2.js CHANGED
@@ -37,3 +37,4 @@ RE2.prototype[Symbol.matchAll] = function* (str) {
37
37
  };
38
38
 
39
39
  module.exports = RE2;
40
+ module.exports.RE2 = RE2;