re2js 0.4.0 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -2
- package/build/index.cjs.cjs +11 -55
- package/build/index.cjs.cjs.map +1 -1
- package/build/index.esm.d.ts +5 -11
- package/build/index.esm.d.ts.map +1 -1
- package/build/index.esm.js +11 -55
- package/build/index.esm.js.map +1 -1
- package/build/index.umd.js +11 -55
- package/build/index.umd.js.map +1 -1
- package/package.json +21 -21
package/README.md
CHANGED
|
@@ -24,11 +24,14 @@ There are certain features of PCRE or Perl regular expressions that cannot be im
|
|
|
24
24
|
To install RE2JS:
|
|
25
25
|
|
|
26
26
|
```bash
|
|
27
|
+
# npm
|
|
27
28
|
npm install re2js
|
|
28
|
-
#
|
|
29
|
+
# yarn
|
|
29
30
|
yarn add re2js
|
|
30
|
-
#
|
|
31
|
+
# pnpm
|
|
31
32
|
pnpm add re2js
|
|
33
|
+
# bun
|
|
34
|
+
bun add re2js
|
|
32
35
|
```
|
|
33
36
|
|
|
34
37
|
## Usage
|
package/build/index.cjs.cjs
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
* re2js
|
|
3
3
|
* RE2JS is the JavaScript port of RE2, a regular expression engine that provides linear time matching
|
|
4
4
|
*
|
|
5
|
-
* @version v0.4.
|
|
5
|
+
* @version v0.4.1
|
|
6
6
|
* @author Alexey Vasiliev
|
|
7
7
|
* @homepage https://github.com/le0pard/re2js#readme
|
|
8
8
|
* @repository github:le0pard/re2js
|
|
@@ -716,8 +716,7 @@ class MatcherInputBase {
|
|
|
716
716
|
}
|
|
717
717
|
}
|
|
718
718
|
class Utf8MatcherInput extends MatcherInputBase {
|
|
719
|
-
constructor() {
|
|
720
|
-
let bytes = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : null;
|
|
719
|
+
constructor(bytes = null) {
|
|
721
720
|
super();
|
|
722
721
|
this.bytes = bytes;
|
|
723
722
|
}
|
|
@@ -749,8 +748,7 @@ class Utf8MatcherInput extends MatcherInputBase {
|
|
|
749
748
|
}
|
|
750
749
|
}
|
|
751
750
|
class Utf16MatcherInput extends MatcherInputBase {
|
|
752
|
-
constructor() {
|
|
753
|
-
let charSequence = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : null;
|
|
751
|
+
constructor(charSequence = null) {
|
|
754
752
|
super();
|
|
755
753
|
this.charSequence = charSequence;
|
|
756
754
|
}
|
|
@@ -874,7 +872,6 @@ class RE2JSFlagsException extends RE2JSException {
|
|
|
874
872
|
/**
|
|
875
873
|
* A stateful iterator that interprets a regex {@code RE2JS} on a specific input.
|
|
876
874
|
*
|
|
877
|
-
* <p>
|
|
878
875
|
* Conceptually, a Matcher consists of four parts:
|
|
879
876
|
* <ol>
|
|
880
877
|
* <li>A compiled regular expression {@code RE2JS}, set at construction and fixed for the lifetime
|
|
@@ -1168,12 +1165,10 @@ class Matcher {
|
|
|
1168
1165
|
* the form {@code $n}, where {@code n} is the group number in decimal. It advances the append
|
|
1169
1166
|
* position to where the most recent match ended.
|
|
1170
1167
|
*
|
|
1171
|
-
* <p>
|
|
1172
1168
|
* To embed a literal {@code $}, use \$ (actually {@code "\\$"} with string escapes). The escape
|
|
1173
1169
|
* is only necessary when {@code $} is followed by a digit, but it is always allowed. Only
|
|
1174
1170
|
* {@code $} and {@code \} need escaping, but any character can be escaped.
|
|
1175
1171
|
*
|
|
1176
|
-
* <p>
|
|
1177
1172
|
* The group number {@code n} in {@code $n} is always at least one digit and expands to use more
|
|
1178
1173
|
* digits as long as the resulting number is a valid group number for this pattern. To cut it off
|
|
1179
1174
|
* earlier, escape the first digit that should not be used.
|
|
@@ -1431,9 +1426,7 @@ class MachineInputBase {
|
|
|
1431
1426
|
// An implementation of MachineInput for UTF-8 byte arrays.
|
|
1432
1427
|
// |pos| and |width| are byte indices.
|
|
1433
1428
|
class MachineUTF8Input extends MachineInputBase {
|
|
1434
|
-
constructor(bytes) {
|
|
1435
|
-
let start = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : 0;
|
|
1436
|
-
let end = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : bytes.length;
|
|
1429
|
+
constructor(bytes, start = 0, end = bytes.length) {
|
|
1437
1430
|
super();
|
|
1438
1431
|
this.bytes = bytes;
|
|
1439
1432
|
this.start = start;
|
|
@@ -1516,8 +1509,7 @@ class MachineUTF8Input extends MachineInputBase {
|
|
|
1516
1509
|
|
|
1517
1510
|
// Returns the index of the first occurrence of array |target| within
|
|
1518
1511
|
// array |source| after |fromIndex|, or -1 if not found.
|
|
1519
|
-
indexOf(source, target) {
|
|
1520
|
-
let fromIndex = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : 0;
|
|
1512
|
+
indexOf(source, target, fromIndex = 0) {
|
|
1521
1513
|
let targetLength = target.length;
|
|
1522
1514
|
if (targetLength === 0) {
|
|
1523
1515
|
return -1;
|
|
@@ -1538,9 +1530,7 @@ class MachineUTF8Input extends MachineInputBase {
|
|
|
1538
1530
|
|
|
1539
1531
|
// |pos| and |width| are in JS "char" units.
|
|
1540
1532
|
class MachineUTF16Input extends MachineInputBase {
|
|
1541
|
-
constructor(charSequence) {
|
|
1542
|
-
let start = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : 0;
|
|
1543
|
-
let end = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : charSequence.length;
|
|
1533
|
+
constructor(charSequence, start = 0, end = charSequence.length) {
|
|
1544
1534
|
super();
|
|
1545
1535
|
this.charSequence = charSequence;
|
|
1546
1536
|
this.start = start;
|
|
@@ -1580,14 +1570,10 @@ class MachineUTF16Input extends MachineInputBase {
|
|
|
1580
1570
|
}
|
|
1581
1571
|
}
|
|
1582
1572
|
class MachineInput {
|
|
1583
|
-
static fromUTF8(bytes) {
|
|
1584
|
-
let start = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : 0;
|
|
1585
|
-
let end = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : bytes.length;
|
|
1573
|
+
static fromUTF8(bytes, start = 0, end = bytes.length) {
|
|
1586
1574
|
return new MachineUTF8Input(bytes, start, end);
|
|
1587
1575
|
}
|
|
1588
|
-
static fromUTF16(charSequence) {
|
|
1589
|
-
let start = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : 0;
|
|
1590
|
-
let end = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : charSequence.length;
|
|
1576
|
+
static fromUTF16(charSequence, start = 0, end = charSequence.length) {
|
|
1591
1577
|
return new MachineUTF16Input(charSequence, start, end);
|
|
1592
1578
|
}
|
|
1593
1579
|
}
|
|
@@ -1675,7 +1661,6 @@ class Regexp {
|
|
|
1675
1661
|
this.name = null; // capturing name, for CAPTURE
|
|
1676
1662
|
this.namedGroups = {}; // map of group name -> capturing index
|
|
1677
1663
|
}
|
|
1678
|
-
|
|
1679
1664
|
reinit() {
|
|
1680
1665
|
this.flags = 0;
|
|
1681
1666
|
this.subs = Regexp.emptySubs();
|
|
@@ -4105,7 +4090,6 @@ class Parser {
|
|
|
4105
4090
|
if (this.swapVerticalBar()) {
|
|
4106
4091
|
this.pop(); // pop vertical bar
|
|
4107
4092
|
}
|
|
4108
|
-
|
|
4109
4093
|
this.alternate();
|
|
4110
4094
|
const n = this.stack.length;
|
|
4111
4095
|
if (n !== 1) {
|
|
@@ -4279,7 +4263,6 @@ class Parser {
|
|
|
4279
4263
|
if (this.swapVerticalBar()) {
|
|
4280
4264
|
this.pop(); // pop vertical bar
|
|
4281
4265
|
}
|
|
4282
|
-
|
|
4283
4266
|
this.alternate();
|
|
4284
4267
|
const n = this.stack.length;
|
|
4285
4268
|
if (n < 2) {
|
|
@@ -4865,10 +4848,8 @@ class AtomicReference {
|
|
|
4865
4848
|
* An RE2 class instance is a compiled representation of an RE2 regular expression, independent of
|
|
4866
4849
|
* the public Java-like Pattern/Matcher API.
|
|
4867
4850
|
*
|
|
4868
|
-
* <p>
|
|
4869
4851
|
* This class also contains various implementation helpers for RE2 regular expressions.
|
|
4870
4852
|
*
|
|
4871
|
-
* <p>
|
|
4872
4853
|
* Use the {@link #quoteMeta(String)} utility function to quote all regular expression
|
|
4873
4854
|
* metacharacters in an arbitrary string.
|
|
4874
4855
|
*
|
|
@@ -4890,7 +4871,6 @@ class RE2 {
|
|
|
4890
4871
|
* Parses a regular expression and returns, if successful, an {@code RE2} instance that can be
|
|
4891
4872
|
* used to match against text.
|
|
4892
4873
|
*
|
|
4893
|
-
* <p>
|
|
4894
4874
|
* When matching against text, the regexp returns a match that begins as early as possible in the
|
|
4895
4875
|
* input (leftmost), and among those it chooses the one that a backtracking search would have
|
|
4896
4876
|
* found first. This so-called leftmost-first matching is the same semantics that Perl, Python,
|
|
@@ -4905,13 +4885,11 @@ class RE2 {
|
|
|
4905
4885
|
* {@code compilePOSIX} is like {@link #compile} but restricts the regular expression to POSIX ERE
|
|
4906
4886
|
* (egrep) syntax and changes the match semantics to leftmost-longest.
|
|
4907
4887
|
*
|
|
4908
|
-
* <p>
|
|
4909
4888
|
* That is, when matching against text, the regexp returns a match that begins as early as
|
|
4910
4889
|
* possible in the input (leftmost), and among those it chooses a match that is as long as
|
|
4911
4890
|
* possible. This so-called leftmost-longest matching is the same semantics that early regular
|
|
4912
4891
|
* expression implementations used and that POSIX specifies.
|
|
4913
4892
|
*
|
|
4914
|
-
* <p>
|
|
4915
4893
|
* However, there can be multiple leftmost-longest matches, with different submatch choices, and
|
|
4916
4894
|
* here this package diverges from POSIX. Among the possible leftmost-longest matches, this
|
|
4917
4895
|
* package chooses the one that a backtracking search would have found first, while POSIX
|
|
@@ -4944,7 +4922,6 @@ class RE2 {
|
|
|
4944
4922
|
/**
|
|
4945
4923
|
* Returns true iff textual regular expression {@code pattern} matches string {@code s}.
|
|
4946
4924
|
*
|
|
4947
|
-
* <p>
|
|
4948
4925
|
* More complicated queries need to use {@link #compile} and the full {@code RE2} interface.
|
|
4949
4926
|
*/
|
|
4950
4927
|
// This is visible for testing.
|
|
@@ -5262,7 +5239,6 @@ class RE2 {
|
|
|
5262
5239
|
* Returns an array holding the text of the leftmost match in {@code b} of this regular
|
|
5263
5240
|
* expression.
|
|
5264
5241
|
*
|
|
5265
|
-
* <p>
|
|
5266
5242
|
* A return value of null indicates no match.
|
|
5267
5243
|
*/
|
|
5268
5244
|
// This is visible for testing.
|
|
@@ -5278,7 +5254,6 @@ class RE2 {
|
|
|
5278
5254
|
* Returns a two-element array of integers defining the location of the leftmost match in
|
|
5279
5255
|
* {@code b} of this regular expression. The match itself is at {@code b[loc[0]...loc[1]]}.
|
|
5280
5256
|
*
|
|
5281
|
-
* <p>
|
|
5282
5257
|
* A return value of null indicates no match.
|
|
5283
5258
|
*/
|
|
5284
5259
|
// This is visible for testing.
|
|
@@ -5294,7 +5269,6 @@ class RE2 {
|
|
|
5294
5269
|
* Returns a string holding the text of the leftmost match in {@code s} of this regular
|
|
5295
5270
|
* expression.
|
|
5296
5271
|
*
|
|
5297
|
-
* <p>
|
|
5298
5272
|
* If there is no match, the return value is an empty string, but it will also be empty if the
|
|
5299
5273
|
* regular expression successfully matches an empty string. Use {@link #findIndex} or
|
|
5300
5274
|
* {@link #findSubmatch} if it is necessary to distinguish these cases.
|
|
@@ -5313,7 +5287,6 @@ class RE2 {
|
|
|
5313
5287
|
* {@code s} of this regular expression. The match itself is at
|
|
5314
5288
|
* {@code s.substring(loc[0], loc[1])}.
|
|
5315
5289
|
*
|
|
5316
|
-
* <p>
|
|
5317
5290
|
* A return value of null indicates no match.
|
|
5318
5291
|
*/
|
|
5319
5292
|
// This is visible for testing.
|
|
@@ -5326,7 +5299,6 @@ class RE2 {
|
|
|
5326
5299
|
* {@code b} and the matches, if any, of its subexpressions, as defined by the <a
|
|
5327
5300
|
* href='#submatch'>Submatch</a> description above.
|
|
5328
5301
|
*
|
|
5329
|
-
* <p>
|
|
5330
5302
|
* A return value of null indicates no match.
|
|
5331
5303
|
*/
|
|
5332
5304
|
// This is visible for testing.
|
|
@@ -5349,7 +5321,6 @@ class RE2 {
|
|
|
5349
5321
|
* expression in {@code b} and the matches, if any, of its subexpressions, as defined by the the
|
|
5350
5322
|
* <a href='#submatch'>Submatch</a> and <a href='#index'>Index</a> descriptions above.
|
|
5351
5323
|
*
|
|
5352
|
-
* <p>
|
|
5353
5324
|
* A return value of null indicates no match.
|
|
5354
5325
|
*/
|
|
5355
5326
|
// This is visible for testing.
|
|
@@ -5362,7 +5333,6 @@ class RE2 {
|
|
|
5362
5333
|
* {@code s} and the matches, if any, of its subexpressions, as defined by the <a
|
|
5363
5334
|
* href='#submatch'>Submatch</a> description above.
|
|
5364
5335
|
*
|
|
5365
|
-
* <p>
|
|
5366
5336
|
* A return value of null indicates no match.
|
|
5367
5337
|
*/
|
|
5368
5338
|
// This is visible for testing.
|
|
@@ -5385,7 +5355,6 @@ class RE2 {
|
|
|
5385
5355
|
* expression in {@code s} and the matches, if any, of its subexpressions, as defined by the <a
|
|
5386
5356
|
* href='#submatch'>Submatch</a> description above.
|
|
5387
5357
|
*
|
|
5388
|
-
* <p>
|
|
5389
5358
|
* A return value of null indicates no match.
|
|
5390
5359
|
*/
|
|
5391
5360
|
// This is visible for testing.
|
|
@@ -5398,7 +5367,6 @@ class RE2 {
|
|
|
5398
5367
|
* list of up to {@code n} successive matches of the expression, as defined by the <a
|
|
5399
5368
|
* href='#all'>All</a> description above.
|
|
5400
5369
|
*
|
|
5401
|
-
* <p>
|
|
5402
5370
|
* A return value of null indicates no match.
|
|
5403
5371
|
*
|
|
5404
5372
|
* TODO(adonovan): think about defining a byte slice view class, like a read-only Go slice backed
|
|
@@ -5418,7 +5386,6 @@ class RE2 {
|
|
|
5418
5386
|
* returns a list of up to {@code n} successive matches of the expression, as defined by the <a
|
|
5419
5387
|
* href='#all'>All</a> description above.
|
|
5420
5388
|
*
|
|
5421
|
-
* <p>
|
|
5422
5389
|
* A return value of null indicates no match.
|
|
5423
5390
|
*/
|
|
5424
5391
|
// This is visible for testing.
|
|
@@ -5435,7 +5402,6 @@ class RE2 {
|
|
|
5435
5402
|
* to {@code n} successive matches of the expression, as defined by the <a href='#all'>All</a>
|
|
5436
5403
|
* description above.
|
|
5437
5404
|
*
|
|
5438
|
-
* <p>
|
|
5439
5405
|
* A return value of null indicates no match.
|
|
5440
5406
|
*/
|
|
5441
5407
|
// This is visible for testing.
|
|
@@ -5452,7 +5418,6 @@ class RE2 {
|
|
|
5452
5418
|
* list of up to {@code n} successive matches of the expression, as defined by the <a
|
|
5453
5419
|
* href='#all'>All</a> description above.
|
|
5454
5420
|
*
|
|
5455
|
-
* <p>
|
|
5456
5421
|
* A return value of null indicates no match.
|
|
5457
5422
|
*/
|
|
5458
5423
|
// This is visible for testing.
|
|
@@ -5469,7 +5434,6 @@ class RE2 {
|
|
|
5469
5434
|
* it returns a list of up to {@code n} successive matches of the expression, as defined by the <a
|
|
5470
5435
|
* href='#all'>All</a> description above.
|
|
5471
5436
|
*
|
|
5472
|
-
* <p>
|
|
5473
5437
|
* A return value of null indicates no match.
|
|
5474
5438
|
*/
|
|
5475
5439
|
// This is visible for testing.
|
|
@@ -5494,7 +5458,6 @@ class RE2 {
|
|
|
5494
5458
|
* {@link #findUTF8SubmatchIndex}; it returns a list of up to {@code n} successive matches of the
|
|
5495
5459
|
* expression, as defined by the <a href='#all'>All</a> description above.
|
|
5496
5460
|
*
|
|
5497
|
-
* <p>
|
|
5498
5461
|
* A return value of null indicates no match.
|
|
5499
5462
|
*/
|
|
5500
5463
|
// This is visible for testing.
|
|
@@ -5511,7 +5474,6 @@ class RE2 {
|
|
|
5511
5474
|
* returns a list of up to {@code n} successive matches of the expression, as defined by the <a
|
|
5512
5475
|
* href='#all'>All</a> description above.
|
|
5513
5476
|
*
|
|
5514
|
-
* <p>
|
|
5515
5477
|
* A return value of null indicates no match.
|
|
5516
5478
|
*/
|
|
5517
5479
|
// This is visible for testing.
|
|
@@ -5536,7 +5498,6 @@ class RE2 {
|
|
|
5536
5498
|
* {@link #findSubmatchIndex}; it returns a list of up to {@code n} successive matches of the
|
|
5537
5499
|
* expression, as defined by the <a href='#all'>All</a> description above.
|
|
5538
5500
|
*
|
|
5539
|
-
* <p>
|
|
5540
5501
|
* A return value of null indicates no match.
|
|
5541
5502
|
*/
|
|
5542
5503
|
// This is visible for testing.
|
|
@@ -5552,7 +5513,6 @@ class RE2 {
|
|
|
5552
5513
|
/**
|
|
5553
5514
|
* A compiled representation of an RE2 regular expression
|
|
5554
5515
|
*
|
|
5555
|
-
* <p>
|
|
5556
5516
|
* The matching functions take {@code String} arguments instead of the more general Java
|
|
5557
5517
|
* {@code CharSequence} since the latter doesn't provide UTF-16 decoding.
|
|
5558
5518
|
*
|
|
@@ -5586,10 +5546,9 @@ class RE2JS {
|
|
|
5586
5546
|
/**
|
|
5587
5547
|
* Returns a literal pattern string for the specified string.
|
|
5588
5548
|
*
|
|
5589
|
-
* <p>
|
|
5590
5549
|
* This method produces a string that can be used to create a <code>RE2JS</code> that would
|
|
5591
5550
|
* match the string <code>s</code> as if it were a literal pattern.
|
|
5592
|
-
*
|
|
5551
|
+
*
|
|
5593
5552
|
* Metacharacters or escape sequences in the input sequence will be given no special meaning.
|
|
5594
5553
|
*
|
|
5595
5554
|
* @param {string} str The string to be literalized
|
|
@@ -5605,8 +5564,7 @@ class RE2JS {
|
|
|
5605
5564
|
* @param {number} [flags=0]
|
|
5606
5565
|
* @returns {RE2JS}
|
|
5607
5566
|
*/
|
|
5608
|
-
static compile(regex) {
|
|
5609
|
-
let flags = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : 0;
|
|
5567
|
+
static compile(regex, flags = 0) {
|
|
5610
5568
|
let fregex = regex;
|
|
5611
5569
|
if ((flags & RE2JS.CASE_INSENSITIVE) !== 0) {
|
|
5612
5570
|
fregex = `(?i)${fregex}`;
|
|
@@ -5725,7 +5683,6 @@ class RE2JS {
|
|
|
5725
5683
|
* Splits input around instances of the regular expression. It returns an array giving the strings
|
|
5726
5684
|
* that occur before, between, and after instances of the regular expression.
|
|
5727
5685
|
*
|
|
5728
|
-
* <p>
|
|
5729
5686
|
* If {@code limit <= 0}, there is no limit on the size of the returned array. If
|
|
5730
5687
|
* {@code limit == 0}, empty strings that would occur at the end of the array are omitted. If
|
|
5731
5688
|
* {@code limit > 0}, at most limit strings are returned. The final string contains the remainder
|
|
@@ -5735,8 +5692,7 @@ class RE2JS {
|
|
|
5735
5692
|
* @param {number} [limit=0] the limit
|
|
5736
5693
|
* @returns {string[]} the split strings
|
|
5737
5694
|
*/
|
|
5738
|
-
split(input) {
|
|
5739
|
-
let limit = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : 0;
|
|
5695
|
+
split(input, limit = 0) {
|
|
5740
5696
|
const m = this.matcher(input);
|
|
5741
5697
|
const result = [];
|
|
5742
5698
|
let emptiesSkipped = 0;
|