re2js 2.5.0 → 2.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +38 -11
- package/build/index.cjs.cjs +44 -1
- package/build/index.cjs.cjs.map +1 -1
- package/build/index.esm.d.ts +8 -0
- package/build/index.esm.d.ts.map +1 -1
- package/build/index.esm.js +44 -1
- package/build/index.esm.js.map +1 -1
- package/build/index.umd.js +44 -1
- package/build/index.umd.js.map +1 -1
- package/package.json +13 -8
package/README.md
CHANGED
|
@@ -228,6 +228,33 @@ matchString.group() // 'e'
|
|
|
228
228
|
matchString.find(7) // false
|
|
229
229
|
```
|
|
230
230
|
|
|
231
|
+
### Iterating Over Matches (`matchAll`)
|
|
232
|
+
|
|
233
|
+
For a more modern, JavaScript-native developer experience, RE2JS provides a `.matchAll()` method. This returns an ES6 `IterableIterator`, allowing you to safely and cleanly iterate over matches using `for...of` loops or the array spread operator `[...]`.
|
|
234
|
+
|
|
235
|
+
Unlike native `RegExp` objects with the `/g` flag, RE2JS is completely stateless. This means you don't have to worry about `.lastIndex` bugs—you can iterate over the same regex instance as many times as you want safely.
|
|
236
|
+
|
|
237
|
+
The yielded match arrays perfectly mirror the shape of native JavaScript regex matches. They include `.index`, `.input`, and `.groups` properties, and properly map unmatched capture groups to `undefined`.
|
|
238
|
+
|
|
239
|
+
```js
|
|
240
|
+
import { RE2JS } from 're2js'
|
|
241
|
+
|
|
242
|
+
const re2 = RE2JS.compile('(?P<year>\\d{4})-(?P<month>\\d{2})')
|
|
243
|
+
const input = 'Dates: 2024-05 and 2025-11.'
|
|
244
|
+
|
|
245
|
+
// Native ES6 Iteration
|
|
246
|
+
for (const match of re2.matchAll(input)) {
|
|
247
|
+
console.log(match[0]); // "2024-05", then "2025-11"
|
|
248
|
+
console.log(match.index); // 7, then 19
|
|
249
|
+
console.log(match.groups); // { year: '2024', month: '05' } then { year: '2025', month: '11' }
|
|
250
|
+
console.log(match.groups.year); // "2024", then "2025"
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
// Or easily collect all matches into an array
|
|
254
|
+
const allMatches = [...re2.matchAll(input)];
|
|
255
|
+
console.log(allMatches.length); // 2
|
|
256
|
+
```
|
|
257
|
+
|
|
231
258
|
### Multi-Pattern Matching (RE2Set)
|
|
232
259
|
|
|
233
260
|
RE2JS includes a highly optimized `RE2Set` API that allows you to match multiple regular expressions against a single string simultaneously. Instead of running 100 different regexes in a loop ($O(100n)$ time), `RE2Set` compiles them into a single state machine and finds all matches in a single pass ($O(n)$ linear time).
|
|
@@ -631,23 +658,23 @@ Should you require maximum absolute performance on the server side when using RE
|
|
|
631
658
|
|
|
632
659
|
Because RE2JS's Lazy DFA, Prefilter, and OnePass engines operate efficiently within V8's Just-In-Time (JIT) compiler, they can outperform native C++ bindings (`re2-node`) for many operations by avoiding the cross-boundary serialization costs between JavaScript and C++.
|
|
633
660
|
|
|
634
|
-
Here is a benchmark running 30,000 items through both engines using their respective `.test()` fast-paths (averages of multiple runs):
|
|
661
|
+
Here is a [benchmark running 30,000 items](src/__tests__/re2.bench.js) through both engines using their respective `.test()` fast-paths (averages of multiple runs):
|
|
635
662
|
|
|
636
663
|
| Benchmark Scenario | Pattern Example | RE2JS (Pure JS) | RE2-Node (C++) | Result |
|
|
637
664
|
|:--------------------------|:---------------------------|:----------------|:---------------|:----------------------------|
|
|
638
|
-
| **ReDoS Attempt** | `/(a+)+!/` | **
|
|
639
|
-
| **
|
|
640
|
-
| **
|
|
641
|
-
| **Lazy Wildcard** | `/enters.*?battlefield/` | **9.
|
|
642
|
-
| **Greedy Wildcard** | `/enters.*battlefield/` | **9.
|
|
643
|
-
| **Massive Alternation** | `/White\|Blue\|Black.../` | 11.
|
|
644
|
-
| **Bounded Repetition** | `/[A-Z][a-z]{5,15}/` |
|
|
645
|
-
| **Case Insensitive** | `/(?i)swamp/` |
|
|
646
|
-
| **Word Boundaries (NFA)** | `/\b(Flying\|First...)\b/` |
|
|
665
|
+
| **ReDoS Attempt** | `/(a+)+!/` | **2.16 ms** | 15.94 ms | `re2js` is **7.38x** faster |
|
|
666
|
+
| **Simple Literal** | `/damage/` | **2.58 ms** | 12.39 ms | `re2js` is **4.80x** faster |
|
|
667
|
+
| **Deep State Machine** | `/([0-9]+(/[0-9]+)+)/` | **11.20 ms** | 15.76 ms | `re2js` is **1.41x** faster |
|
|
668
|
+
| **Lazy Wildcard** | `/enters.*?battlefield/` | **9.78 ms** | 12.99 ms | `re2js` is **1.33x** faster |
|
|
669
|
+
| **Greedy Wildcard** | `/enters.*battlefield/` | **9.90 ms** | 12.99 ms | `re2js` is **1.31x** faster |
|
|
670
|
+
| **Massive Alternation** | `/White\|Blue\|Black.../` | **11.31 ms** | 14.81 ms | `re2js` is **1.31x** faster |
|
|
671
|
+
| **Bounded Repetition** | `/[A-Z][a-z]{5,15}/` | 28.20 ms | **13.60 ms** | `re2-node` is 2.07x faster |
|
|
672
|
+
| **Case Insensitive** | `/(?i)swamp/` | 56.41 ms | **16.13 ms** | `re2-node` is 3.50x faster |
|
|
673
|
+
| **Word Boundaries (NFA)** | `/\b(Flying\|First...)\b/` | 107.12 ms | **15.41 ms** | `re2-node` is 6.95x faster |
|
|
647
674
|
|
|
648
675
|
**Takeaways:**
|
|
649
676
|
* **Pure JS Strengths:** For complex state tracking (nested groups, wildcards) and literal string scanning, `re2js` actually beats the native C++ bindings. V8's Turbofan JIT compiler is able to heavily optimize the Pure JS DFA loop, bypassing the C++ boundary entirely.
|
|
650
|
-
* **C++ Strengths:** For character class evaluations (Case Insensitivity, Bounded Repetitions
|
|
677
|
+
* **C++ Strengths:** For character class evaluations (Case Insensitivity, Bounded Repetitions), `re2-node` has a slight edge thanks to highly optimized, hardware-level memory tables.
|
|
651
678
|
* **The NFA Fallback:** Pure DFA engines mathematically cannot track look-behind context like Word Boundaries (`\b`). When RE2JS encounters these, it safely bails out to its NFA engine. As shown in the benchmarks, the pure JS NFA is significantly slower than the C++ NFA. **For maximum performance in RE2JS, avoid `\b` when doing bulk boolean `.test()` matching.**
|
|
652
679
|
|
|
653
680
|
|
package/build/index.cjs.cjs
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
* re2js
|
|
3
3
|
* RE2JS is the JavaScript port of RE2, a regular expression engine that provides linear time matching
|
|
4
4
|
*
|
|
5
|
-
* @version v2.
|
|
5
|
+
* @version v2.6.0
|
|
6
6
|
* @author Oleksii Vasyliev
|
|
7
7
|
* @homepage https://github.com/le0pard/re2js#readme
|
|
8
8
|
* @repository github:le0pard/re2js
|
|
@@ -8654,6 +8654,47 @@ class RE2JS {
|
|
|
8654
8654
|
return result;
|
|
8655
8655
|
}
|
|
8656
8656
|
|
|
8657
|
+
/**
|
|
8658
|
+
* Returns an iterator of all results matching a string against the regular expression,
|
|
8659
|
+
* including capturing groups.
|
|
8660
|
+
*
|
|
8661
|
+
* @param {string|number[]|Uint8Array} input the input string or byte array
|
|
8662
|
+
* @returns {IterableIterator<Array>}
|
|
8663
|
+
*/
|
|
8664
|
+
*matchAll(input) {
|
|
8665
|
+
const m = this.matcher(input);
|
|
8666
|
+
const inputStr = typeof input === 'string' ? input : m.matcherInput.asCharSequence();
|
|
8667
|
+
while (m.find()) {
|
|
8668
|
+
// Build the match array starting with the full match
|
|
8669
|
+
const result = [m.group(0)];
|
|
8670
|
+
|
|
8671
|
+
// Append all capture groups using void 0 instead of undefined
|
|
8672
|
+
for (let i = 1; i <= m.groupCount(); i++) {
|
|
8673
|
+
const groupVal = m.group(i);
|
|
8674
|
+
result.push(groupVal === null ? void 0 : groupVal);
|
|
8675
|
+
}
|
|
8676
|
+
|
|
8677
|
+
// Attach native RegExp match properties
|
|
8678
|
+
result.index = m.start(0);
|
|
8679
|
+
result.input = inputStr;
|
|
8680
|
+
|
|
8681
|
+
// Attach named capture groups if they exist
|
|
8682
|
+
const namedGroups = this.namedGroups();
|
|
8683
|
+
if (Object.keys(namedGroups).length > 0) {
|
|
8684
|
+
const parsedGroups = m.getNamedGroups();
|
|
8685
|
+
for (const key in parsedGroups) {
|
|
8686
|
+
if (parsedGroups[key] === null) {
|
|
8687
|
+
parsedGroups[key] = void 0;
|
|
8688
|
+
}
|
|
8689
|
+
}
|
|
8690
|
+
result.groups = parsedGroups;
|
|
8691
|
+
} else {
|
|
8692
|
+
result.groups = void 0;
|
|
8693
|
+
}
|
|
8694
|
+
yield result;
|
|
8695
|
+
}
|
|
8696
|
+
}
|
|
8697
|
+
|
|
8657
8698
|
/**
|
|
8658
8699
|
*
|
|
8659
8700
|
* @returns {string}
|
|
@@ -8710,6 +8751,8 @@ class RE2JS {
|
|
|
8710
8751
|
return this.flagsInput === other.flagsInput && this.patternInput === other.patternInput;
|
|
8711
8752
|
}
|
|
8712
8753
|
}
|
|
8754
|
+
|
|
8755
|
+
/* Small helper for Tagged Template Literals (No Double-Escaping) */
|
|
8713
8756
|
const re = (stringsOrFlags, ...values) => {
|
|
8714
8757
|
if (Array.isArray(stringsOrFlags) && stringsOrFlags.raw) {
|
|
8715
8758
|
const pattern = String.raw(stringsOrFlags, ...values);
|