re2js 2.2.3 → 2.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +9 -2
- package/build/index.cjs.cjs +178 -25
- package/build/index.cjs.cjs.map +1 -1
- package/build/index.esm.d.ts +56 -67
- package/build/index.esm.d.ts.map +1 -1
- package/build/index.esm.js +178 -25
- package/build/index.esm.js.map +1 -1
- package/build/index.umd.js +178 -25
- package/build/index.umd.js.map +1 -1
- package/package.json +1 -1
package/build/index.esm.d.ts
CHANGED
|
@@ -33,9 +33,13 @@ export class Matcher {
|
|
|
33
33
|
/**
|
|
34
34
|
*
|
|
35
35
|
* @param {RE2JS} pattern
|
|
36
|
-
* @param {
|
|
36
|
+
* @param {string|number[]|Uint8Array} input
|
|
37
|
+
*/
|
|
38
|
+
constructor(pattern: RE2JS, input: string | number[] | Uint8Array);
|
|
39
|
+
/**
|
|
40
|
+
* The pattern being matched.
|
|
41
|
+
* @type {RE2JS}
|
|
37
42
|
*/
|
|
38
|
-
constructor(pattern: RE2JS, input: Utf8MatcherInput | Utf16MatcherInput | number[] | string);
|
|
39
43
|
patternInput: RE2JS;
|
|
40
44
|
/** @type {number} */
|
|
41
45
|
patternGroupCount: number;
|
|
@@ -65,11 +69,11 @@ export class Matcher {
|
|
|
65
69
|
anchorFlag: number | undefined;
|
|
66
70
|
/**
|
|
67
71
|
* Resets the {@code Matcher} and changes the input.
|
|
68
|
-
* @param {
|
|
72
|
+
* @param {MatcherInputBase} input
|
|
69
73
|
* @returns {Matcher} the {@code Matcher} itself, for chained method calls
|
|
70
74
|
*/
|
|
71
|
-
resetMatcherInput(input:
|
|
72
|
-
matcherInput:
|
|
75
|
+
resetMatcherInput(input: MatcherInputBase): Matcher;
|
|
76
|
+
matcherInput: MatcherInputBase | undefined;
|
|
73
77
|
/**
|
|
74
78
|
* Returns the start of the named group of the most recent match, or -1 if the group was not
|
|
75
79
|
* matched.
|
|
@@ -98,7 +102,7 @@ export class Matcher {
|
|
|
98
102
|
/**
|
|
99
103
|
* Returns the named group of the most recent match, or {@code null} if the group was not matched.
|
|
100
104
|
* @param {string|number} [group=0]
|
|
101
|
-
* @returns {
|
|
105
|
+
* @returns {string|null}
|
|
102
106
|
*/
|
|
103
107
|
group(group?: string | number): string | null;
|
|
104
108
|
/**
|
|
@@ -137,11 +141,11 @@ export class Matcher {
|
|
|
137
141
|
* Matches the input against the pattern (unanchored), starting at a specified position. If there
|
|
138
142
|
* is a match, {@code find} sets the match state to describe it.
|
|
139
143
|
*
|
|
140
|
-
* @param {number} [start=null] the input position where the search begins
|
|
144
|
+
* @param {number|null} [start=null] the input position where the search begins
|
|
141
145
|
* @returns {boolean} if it finds a match
|
|
142
146
|
* @throws IndexOutOfBoundsException if start is not a valid input position
|
|
143
147
|
*/
|
|
144
|
-
find(start?: number): boolean;
|
|
148
|
+
find(start?: number | null): boolean;
|
|
145
149
|
/**
|
|
146
150
|
* Helper: does match starting at start, with RE2 anchor flag.
|
|
147
151
|
* @param {number} startByte
|
|
@@ -299,10 +303,10 @@ export class RE2JS {
|
|
|
299
303
|
* RE2JS-compatible syntax, and handling Unicode sequences properly. It ensures that the
|
|
300
304
|
* resulting regex is safe and properly formatted before compilation.
|
|
301
305
|
*
|
|
302
|
-
* @param {string} expr - The regular expression string to be translated.
|
|
306
|
+
* @param {string|RegExp} expr - The regular expression string to be translated.
|
|
303
307
|
* @returns {string} - The transformed regular expression string, ready for compilation.
|
|
304
308
|
*/
|
|
305
|
-
static translateRegExp(expr: string): string;
|
|
309
|
+
static translateRegExp(expr: string | RegExp): string;
|
|
306
310
|
/**
|
|
307
311
|
* Helper: create new RE2JS with given regex and flags. Flregex is the regex with flags applied.
|
|
308
312
|
* @param {string} regex
|
|
@@ -314,11 +318,11 @@ export class RE2JS {
|
|
|
314
318
|
* Matches a string against a regular expression.
|
|
315
319
|
*
|
|
316
320
|
* @param {string} regex the regular expression
|
|
317
|
-
* @param {string|number[]} input the input
|
|
321
|
+
* @param {string|number[]|Uint8Array} input the input
|
|
318
322
|
* @returns {boolean} true if the regular expression matches the entire input
|
|
319
323
|
* @throws RE2JSSyntaxException if the regular expression is malformed
|
|
320
324
|
*/
|
|
321
|
-
static matches(regex: string, input: string | number[]): boolean;
|
|
325
|
+
static matches(regex: string, input: string | number[] | Uint8Array): boolean;
|
|
322
326
|
/**
|
|
323
327
|
* This is visible for testing.
|
|
324
328
|
* @private
|
|
@@ -351,37 +355,37 @@ export class RE2JS {
|
|
|
351
355
|
/**
|
|
352
356
|
* Matches a string against a regular expression.
|
|
353
357
|
*
|
|
354
|
-
* @param {string|number[]} input the input
|
|
358
|
+
* @param {string|number[]|Uint8Array} input the input
|
|
355
359
|
* @returns {boolean} true if the regular expression matches the entire input
|
|
356
360
|
*/
|
|
357
|
-
matches(input: string | number[]): boolean;
|
|
361
|
+
matches(input: string | number[] | Uint8Array): boolean;
|
|
358
362
|
/**
|
|
359
363
|
* Creates a new {@code Matcher} matching the pattern against the input.
|
|
360
364
|
*
|
|
361
|
-
* @param {string|number[]} input the input string
|
|
365
|
+
* @param {string|number[]|Uint8Array} input the input string
|
|
362
366
|
* @returns {Matcher}
|
|
363
367
|
*/
|
|
364
|
-
matcher(input: string | number[]): Matcher;
|
|
368
|
+
matcher(input: string | number[] | Uint8Array): Matcher;
|
|
365
369
|
/**
|
|
366
370
|
* Tests whether the regular expression matches any part of the input string.
|
|
367
371
|
* Performance Note: This method is highly optimized. Because it only returns
|
|
368
372
|
* a boolean and does not extract capture groups, it bypasses the `Matcher` overhead
|
|
369
373
|
* and guarantees execution on the high-speed DFA engine whenever possible.
|
|
370
374
|
*
|
|
371
|
-
* @param {string|number[]} input - The input string or UTF-8 byte array to test against.
|
|
375
|
+
* @param {string|number[]|Uint8Array} input - The input string or UTF-8 byte array to test against.
|
|
372
376
|
* @returns {boolean} `true` if the pattern is found anywhere in the input, `false` otherwise.
|
|
373
377
|
*/
|
|
374
|
-
test(input: string | number[]): boolean;
|
|
378
|
+
test(input: string | number[] | Uint8Array): boolean;
|
|
375
379
|
/**
|
|
376
380
|
* Tests whether the regular expression matches the ENTIRE input string.
|
|
377
381
|
* * **Performance Note:** This operates identically to `.matches()`, but is significantly
|
|
378
382
|
* faster because it does not request capture group data. By requesting 0 capture groups,
|
|
379
383
|
* it securely routes execution through the DFA fast-path.
|
|
380
384
|
*
|
|
381
|
-
* @param {string|number[]} input - The input string or UTF-8 byte array to test against.
|
|
385
|
+
* @param {string|number[]|Uint8Array} input - The input string or UTF-8 byte array to test against.
|
|
382
386
|
* @returns {boolean} `true` if the exact input string fully matches the pattern, `false` otherwise.
|
|
383
387
|
*/
|
|
384
|
-
testExact(input: string | number[]): boolean;
|
|
388
|
+
testExact(input: string | number[] | Uint8Array): boolean;
|
|
385
389
|
/**
|
|
386
390
|
* Splits input around instances of the regular expression. It returns an array giving the strings
|
|
387
391
|
* that occur before, between, and after instances of the regular expression.
|
|
@@ -481,9 +485,17 @@ export class RE2JSSyntaxException extends RE2JSException {
|
|
|
481
485
|
getPattern(): string | null;
|
|
482
486
|
}
|
|
483
487
|
export class RE2Set {
|
|
488
|
+
/** @type {number} */
|
|
484
489
|
static UNANCHORED: number;
|
|
490
|
+
/** @type {number} */
|
|
485
491
|
static ANCHOR_START: number;
|
|
492
|
+
/** @type {number} */
|
|
486
493
|
static ANCHOR_BOTH: number;
|
|
494
|
+
/**
|
|
495
|
+
* Constructs a new RE2Set with the specified anchor mode and flags.
|
|
496
|
+
* @param {number} [anchor=RE2Set.UNANCHORED] - The anchoring mode (e.g., RE2Set.UNANCHORED).
|
|
497
|
+
* @param {number} [flags=0] - The public flags to apply to all patterns in the set.
|
|
498
|
+
*/
|
|
487
499
|
constructor(anchor?: number, flags?: number);
|
|
488
500
|
anchor: number;
|
|
489
501
|
jsFlags: number;
|
|
@@ -498,49 +510,43 @@ export class RE2Set {
|
|
|
498
510
|
prefixRune: number;
|
|
499
511
|
longest: boolean;
|
|
500
512
|
} | null;
|
|
501
|
-
add(pattern: any): number;
|
|
502
|
-
compile(): void;
|
|
503
|
-
match(input: any): any[];
|
|
504
|
-
}
|
|
505
|
-
declare class Utf8MatcherInput extends MatcherInputBase {
|
|
506
|
-
constructor(bytes?: null);
|
|
507
|
-
bytes: any;
|
|
508
|
-
getEncoding(): any;
|
|
509
513
|
/**
|
|
510
|
-
*
|
|
511
|
-
*
|
|
514
|
+
* Adds a new regular expression pattern to the set.
|
|
515
|
+
* Patterns cannot be added after the set has been compiled.
|
|
516
|
+
* @param {string} pattern - The regular expression pattern to add.
|
|
517
|
+
* @returns {number} The integer index assigned to the added pattern.
|
|
518
|
+
* @throws {RE2JSCompileException} If patterns are added after compilation.
|
|
512
519
|
*/
|
|
513
|
-
|
|
520
|
+
add(pattern: string): number;
|
|
514
521
|
/**
|
|
515
|
-
*
|
|
516
|
-
*
|
|
522
|
+
* Compiles the added patterns into a single state machine.
|
|
523
|
+
* This is automatically called on the first match if not called explicitly.
|
|
524
|
+
* @returns {void}
|
|
517
525
|
*/
|
|
518
|
-
|
|
526
|
+
compile(): void;
|
|
519
527
|
/**
|
|
520
|
-
*
|
|
521
|
-
* @
|
|
528
|
+
* Matches the input against the compiled set of regular expressions.
|
|
529
|
+
* @param {string|number[]|Uint8Array} input - The input string or UTF-8 byte array to match against.
|
|
530
|
+
* @returns {number[]} An array of indices representing the patterns that successfully matched the input.
|
|
522
531
|
*/
|
|
523
|
-
|
|
532
|
+
match(input: string | number[] | Uint8Array): number[];
|
|
524
533
|
}
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
* @returns {string}
|
|
532
|
-
*/
|
|
533
|
-
asCharSequence(): string;
|
|
534
|
+
/**
|
|
535
|
+
* Abstract the representations of input text supplied to Matcher.
|
|
536
|
+
*/
|
|
537
|
+
declare class MatcherInputBase {
|
|
538
|
+
static Encoding: any;
|
|
539
|
+
getEncoding(): void;
|
|
534
540
|
/**
|
|
535
541
|
*
|
|
536
|
-
* @returns {
|
|
542
|
+
* @returns {boolean}
|
|
537
543
|
*/
|
|
538
|
-
|
|
544
|
+
isUTF8Encoding(): boolean;
|
|
539
545
|
/**
|
|
540
546
|
*
|
|
541
|
-
* @returns {
|
|
547
|
+
* @returns {boolean}
|
|
542
548
|
*/
|
|
543
|
-
|
|
549
|
+
isUTF16Encoding(): boolean;
|
|
544
550
|
}
|
|
545
551
|
/**
|
|
546
552
|
* A Prog is a compiled regular expression program.
|
|
@@ -587,22 +593,5 @@ declare class DFA {
|
|
|
587
593
|
match(input: any, pos: any, anchor: any): boolean | null;
|
|
588
594
|
matchSet(input: any, pos: any, anchor: any): any[] | null;
|
|
589
595
|
}
|
|
590
|
-
/**
|
|
591
|
-
* Abstract the representations of input text supplied to Matcher.
|
|
592
|
-
*/
|
|
593
|
-
declare class MatcherInputBase {
|
|
594
|
-
static Encoding: any;
|
|
595
|
-
getEncoding(): void;
|
|
596
|
-
/**
|
|
597
|
-
*
|
|
598
|
-
* @returns {boolean}
|
|
599
|
-
*/
|
|
600
|
-
isUTF8Encoding(): boolean;
|
|
601
|
-
/**
|
|
602
|
-
*
|
|
603
|
-
* @returns {boolean}
|
|
604
|
-
*/
|
|
605
|
-
isUTF16Encoding(): boolean;
|
|
606
|
-
}
|
|
607
596
|
export {};
|
|
608
597
|
//# sourceMappingURL=index.esm.d.ts.map
|
package/build/index.esm.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.esm.d.ts","sourceRoot":"","sources":["index.esm.js"],"names":[],"mappings":"AA6xCA;;;;;;;;;;;;;;;;;;;;;GAqBG;
|
|
1
|
+
{"version":3,"file":"index.esm.d.ts","sourceRoot":"","sources":["index.esm.js"],"names":[],"mappings":"AA6xCA;;;;;;;;;;;;;;;;;;;;;GAqBG;AAEH;IACE;;;;;;;OAOG;IACH,6BAJW,MAAM,aACN,OAAO,GACL,MAAM,CA2BlB;IACD;;;;OAIG;IACH,qBAHW,KAAK,SACL,MAAM,GAAC,MAAM,EAAE,GAAC,UAAU,EA6BpC;IAvBC;;;OAGG;IACH,cAFU,KAAK,CAEY;IAG3B,qBAAqB;IACrB,mBADW,MAAM,CACqC;IAEtD,uBAAuB;IACvB,QADW,MAAM,EAAE,CACH;IAChB,qCAAqC;IACrC,aADW,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CACC;IAClC,qBAAqB;IACrB,sBADW,MAAM,CACqC;IAUxD;;;OAGG;IACH,WAFa,KAAK,CAIjB;IAED;;;;OAIG;IACH,SAFa,OAAO,CAiBnB;IAbC,qBAAqB;IACrB,uCAAoD;IAEpD,qBAAqB;IACrB,8BAAkB;IAElB,8BAAqB;IAGrB,+BAAsB;IAEtB,+BAAmB;IAIrB;;;;OAIG;IACH,yBAHW,gBAAgB,GACd,OAAO,CASnB;IAHC,2CAAyB;IAK3B;;;;;OAKG;IACH,cAHW,MAAM,GAAC,MAAM,GACX,MAAM,CAYlB;IAED;;;;;OAKG;IACH,YAHW,MAAM,GAAC,MAAM,GACX,MAAM,CAYlB;IAED;;;;;;;;;OASG;IACH,eAFa,MAAM,CAIlB;IAED;;;;OAIG;IACH,cAHW,MAAM,GAAC,MAAM,GACX,MAAM,GAAC,IAAI,CAgBvB;IAED;;;;OAIG;IACH,kBAFa,MAAM,CAAC,MAAM,EAAE,MAAM,GAAC,IAAI,CAAC,CAWvC;IAED;;;;OAIG;IACH,cAFa,MAAM,CAIlB;IAED;;;;OAIG;IACH,kBAkBC;IAED;;;;;OAKG;IACH,WAFa,OAAO,CAInB;IAED;;;;;OAKG;IACH,aAFa,OAAO,CAInB;IAED;;;;;;;OAOG;IACH,aAJW,MAAM,GAAC,IAAI,GACT,OAAO,CA4BnB;IAED;;;;;;OAMG;IACH,iBAaC;IAED;;;;;OAKG;IACH,iBAJW,MAAM,OACN,MAAM,GACJ,MAAM,CAOlB;IAED;;;OAGG;IACH,eAFa,MAAM,CAIlB;IAED;;;;;;;;;;;;;;;;;;;;OAoBG;IACH,0BAUC;IAED;;;;OAIG;IACH,sCAgEC;IAED;;;;OAIG;IACH,oCAuGC;IAED;;;;OAIG;IACH,cAFa,MAAM,CAIlB;IAED;;;;;;;;OAQG;IACH,wBALW,MAAM,aACN,OAAO,GACL,MAAM,CAKlB;IAED;;;;;;;;OAQG;IACH,0BALW,MAAM,aACN,OAAO,GACL,MAAM,CAKlB;IAED;;;;;;;OAOG;IACH,gBAWC;CACF;AAwsMD;;;;;;;;;GASG;AACH;IACE;;OAEG;IACH,gCAAuD;IACvD;;OAEG;IACH,sBAAmC;IACnC;;;OAGG;IACH,yBAAyC;IACzC;;OAEG;IACH,sCAAmE;IACnE;;OAEG;IACH,6BAAiD;IACjD;;OAEG;IACH,2BAA6C;IAE7C;;;;;;;;;;OAUG;IACH,kBAHW,MAAM,GACJ,MAAM,CAIlB;IAED;;;;;;;;;OASG;IACH,6BAJW,MAAM,aACN,OAAO,GACL,MAAM,CAIlB;IAED;;;;;;;;;;OAUG;IACH,6BAHW,MAAM,GAAC,MAAM,GACX,MAAM,CAIlB;IAED;;;;;OAKG;IACH,sBAJW,MAAM,UACN,MAAM,GACJ,KAAK,CA2BjB;IAED;;;;;;;OAOG;IACH,sBALW,MAAM,SACN,MAAM,GAAC,MAAM,EAAE,GAAC,UAAU,GACxB,OAAO,CAKnB;IAED;;;OAGG;IACH,wBAWC;IAED;;;;OAIG;IACH,qBAHW,MAAM,SACN,MAAM,EAOhB;IAHC,qBAA2B;IAE3B,mBAAuB;IAGzB;;;OAGG;IACH,cAEC;IAED;;;OAGG;IACH,SAFa,MAAM,CAIlB;IAED;;;OAGG;IACH,WAFa,MAAM,CAIlB;IACD,WAEC;IAED;;;;;OAKG;IACH,eAHW,MAAM,GAAC,MAAM,EAAE,GAAC,UAAU,GACxB,OAAO,CAInB;IAED;;;;;OAKG;IACH,eAHW,MAAM,GAAC,MAAM,EAAE,GAAC,UAAU,GACxB,OAAO,CAOnB;IAED;;;;;;;;OAQG;IACH,YAHW,MAAM,GAAC,MAAM,EAAE,GAAC,UAAU,GACxB,OAAO,CAUnB;IAED;;;;;;;;OAQG;IACH,iBAHW,MAAM,GAAC,MAAM,EAAE,GAAC,UAAU,GACxB,OAAO,CAKnB;IAED;;;;;;;;;;;;OAYG;IACH,aAJW,MAAM,UACN,MAAM,GACJ,MAAM,EAAE,CAgDpB;IAED;;;OAGG;IACH,YAFa,MAAM,CAIlB;IAED;;;;;;;;;OASG;IACH,eAFa,MAAM,CAIlB;IAED;;;;;OAKG;IACH,cAFa,MAAM,CAIlB;IAED;;;;OAIG;IACH,eAFa,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAIlC;IAED;;;;OAIG;IACH,cAHW,GAAC,GACC,OAAO,CAUnB;CACF;AAtsOD;;GAEG;AACH;CAMC;AAxDD;IACE,8BAA8B;IAC9B,qBADY,MAAM,EAIjB;CACF;AA+DD;;GAEG;AACH;CAMC;AApBD;;GAEG;AACH;CAMC;AAaD;;GAEG;AACH;CAMC;AAjFD;;GAEG;AACH;IACE;;;OAGG;IACH,mBAHW,MAAM,UACN,MAAM,GAAC,IAAI,EAcrB;IAJC,qBAAqB;IACrB,OADW,MAAM,CACC;IAClB,0BAA0B;IAC1B,OADW,MAAM,GAAC,IAAI,CACJ;IAGpB;;;OAGG;IACH,kBAFa,MAAM,CAIlB;IAED;;;OAGG;IACH,cAFa,MAAM,GAAC,IAAI,CAIvB;CACF;AAqhND;IACE,qBAAqB;IACrB,mBADW,MAAM,CACuB;IACxC,qBAAqB;IACrB,qBADW,MAAM,CAC2B;IAC5C,qBAAqB;IACrB,oBADW,MAAM,CACyB;IAE1C;;;;OAIG;IACH,qBAHW,MAAM,UACN,MAAM,EAiBhB;IAdC,eAAoB;IACpB,gBAAoB;IAQpB,iBAAwB;IACxB,eAAiB;IACjB,kBAAgB;IAChB,gBAAe;IACf;;;;;;aAAoB;IAGtB;;;;;;OAMG;IACH,aAJW,MAAM,GACJ,MAAM,CAoBlB;IAED;;;;OAIG;IACH,WAFa,IAAI,CAahB;IAED;;;;OAIG;IACH,aAHW,MAAM,GAAC,MAAM,EAAE,GAAC,UAAU,GACxB,MAAM,EAAE,CAoBpB;CACF;AA//ND;;GAEG;AACH;IACE,qBAAkD;IAClD,oBAEC;IAED;;;OAGG;IACH,kBAFa,OAAO,CAInB;IAED;;;OAGG;IACH,mBAFa,OAAO,CAInB;CACF;AA8yGD;;GAEG;AACH;IAEI,YAAc;IACd,cAAc;IAGd,eAAe;IACf,gBAAkB;IAClB,cAAc;IAKhB,sBAEC;IAGD,kBAEC;IAID,uBAEC;IAID,sBAOC;IAKD,+BAWC;IAID,oBAoBC;IAeD,8BAYC;IACD,8BAYC;IACD;;;OAGG;IACH,YAFa,MAAM,CAelB;CACF;AArxDD;IACE,gCAA4B;IAC5B,uBASC;IARC,UAAgB;IAChB,0BAA2B;IAC3B,mBAAmB;IACnB,gBAAsB;IACtB,mBAAuB;IACvB,oBAAoB;IACpB,gBAAmB;IACnB,cAAc;IAIhB;;;;aAuCC;IAGD,wBAyDC;IACD,mBAyCC;IAGD,kDA+CC;IAGD,yDA+CC;IAGD,0DAwCC;CACF"}
|
package/build/index.esm.js
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
* re2js
|
|
3
3
|
* RE2JS is the JavaScript port of RE2, a regular expression engine that provides linear time matching
|
|
4
4
|
*
|
|
5
|
-
* @version v2.
|
|
5
|
+
* @version v2.3.1
|
|
6
6
|
* @author Oleksii Vasyliev
|
|
7
7
|
* @homepage https://github.com/le0pard/re2js#readme
|
|
8
8
|
* @repository github:le0pard/re2js
|
|
@@ -1329,6 +1329,7 @@ class RE2JSInternalException extends RE2JSException {
|
|
|
1329
1329
|
*
|
|
1330
1330
|
* @author rsc@google.com (Russ Cox)
|
|
1331
1331
|
*/
|
|
1332
|
+
|
|
1332
1333
|
class Matcher {
|
|
1333
1334
|
/**
|
|
1334
1335
|
* Quotes '\' and '$' in {@code s}, so that the returned string could be used in
|
|
@@ -1367,13 +1368,16 @@ class Matcher {
|
|
|
1367
1368
|
/**
|
|
1368
1369
|
*
|
|
1369
1370
|
* @param {RE2JS} pattern
|
|
1370
|
-
* @param {
|
|
1371
|
+
* @param {string|number[]|Uint8Array} input
|
|
1371
1372
|
*/
|
|
1372
1373
|
constructor(pattern, input) {
|
|
1373
1374
|
if (pattern === null) {
|
|
1374
1375
|
throw new Error('pattern is null');
|
|
1375
1376
|
}
|
|
1376
|
-
|
|
1377
|
+
/**
|
|
1378
|
+
* The pattern being matched.
|
|
1379
|
+
* @type {RE2JS}
|
|
1380
|
+
*/
|
|
1377
1381
|
this.patternInput = pattern;
|
|
1378
1382
|
const re2 = this.patternInput.re2();
|
|
1379
1383
|
// The number of submatches (groups) in the pattern.
|
|
@@ -1427,7 +1431,7 @@ class Matcher {
|
|
|
1427
1431
|
|
|
1428
1432
|
/**
|
|
1429
1433
|
* Resets the {@code Matcher} and changes the input.
|
|
1430
|
-
* @param {
|
|
1434
|
+
* @param {MatcherInputBase} input
|
|
1431
1435
|
* @returns {Matcher} the {@code Matcher} itself, for chained method calls
|
|
1432
1436
|
*/
|
|
1433
1437
|
resetMatcherInput(input) {
|
|
@@ -1492,7 +1496,7 @@ class Matcher {
|
|
|
1492
1496
|
/**
|
|
1493
1497
|
* Returns the named group of the most recent match, or {@code null} if the group was not matched.
|
|
1494
1498
|
* @param {string|number} [group=0]
|
|
1495
|
-
* @returns {
|
|
1499
|
+
* @returns {string|null}
|
|
1496
1500
|
*/
|
|
1497
1501
|
group(group = 0) {
|
|
1498
1502
|
if (typeof group === 'string') {
|
|
@@ -1584,7 +1588,7 @@ class Matcher {
|
|
|
1584
1588
|
* Matches the input against the pattern (unanchored), starting at a specified position. If there
|
|
1585
1589
|
* is a match, {@code find} sets the match state to describe it.
|
|
1586
1590
|
*
|
|
1587
|
-
* @param {number} [start=null] the input position where the search begins
|
|
1591
|
+
* @param {number|null} [start=null] the input position where the search begins
|
|
1588
1592
|
* @returns {boolean} if it finds a match
|
|
1589
1593
|
* @throws IndexOutOfBoundsException if start is not a valid input position
|
|
1590
1594
|
*/
|
|
@@ -7935,9 +7939,18 @@ class RE2 {
|
|
|
7935
7939
|
}
|
|
7936
7940
|
|
|
7937
7941
|
class RE2Set {
|
|
7942
|
+
/** @type {number} */
|
|
7938
7943
|
static UNANCHORED = RE2Flags.UNANCHORED;
|
|
7944
|
+
/** @type {number} */
|
|
7939
7945
|
static ANCHOR_START = RE2Flags.ANCHOR_START;
|
|
7946
|
+
/** @type {number} */
|
|
7940
7947
|
static ANCHOR_BOTH = RE2Flags.ANCHOR_BOTH;
|
|
7948
|
+
|
|
7949
|
+
/**
|
|
7950
|
+
* Constructs a new RE2Set with the specified anchor mode and flags.
|
|
7951
|
+
* @param {number} [anchor=RE2Set.UNANCHORED] - The anchoring mode (e.g., RE2Set.UNANCHORED).
|
|
7952
|
+
* @param {number} [flags=0] - The public flags to apply to all patterns in the set.
|
|
7953
|
+
*/
|
|
7941
7954
|
constructor(anchor = RE2Set.UNANCHORED, flags = 0) {
|
|
7942
7955
|
this.anchor = anchor;
|
|
7943
7956
|
this.jsFlags = flags;
|
|
@@ -7954,6 +7967,14 @@ class RE2Set {
|
|
|
7954
7967
|
this.dfa = null;
|
|
7955
7968
|
this.dummyRe2 = null;
|
|
7956
7969
|
}
|
|
7970
|
+
|
|
7971
|
+
/**
|
|
7972
|
+
* Adds a new regular expression pattern to the set.
|
|
7973
|
+
* Patterns cannot be added after the set has been compiled.
|
|
7974
|
+
* @param {string} pattern - The regular expression pattern to add.
|
|
7975
|
+
* @returns {number} The integer index assigned to the added pattern.
|
|
7976
|
+
* @throws {RE2JSCompileException} If patterns are added after compilation.
|
|
7977
|
+
*/
|
|
7957
7978
|
add(pattern) {
|
|
7958
7979
|
if (this.prog) {
|
|
7959
7980
|
throw new RE2JSCompileException('Cannot add patterns after compile');
|
|
@@ -7972,6 +7993,12 @@ class RE2Set {
|
|
|
7972
7993
|
this.regexps.push(Simplify.simplify(re));
|
|
7973
7994
|
return this.regexps.length - 1;
|
|
7974
7995
|
}
|
|
7996
|
+
|
|
7997
|
+
/**
|
|
7998
|
+
* Compiles the added patterns into a single state machine.
|
|
7999
|
+
* This is automatically called on the first match if not called explicitly.
|
|
8000
|
+
* @returns {void}
|
|
8001
|
+
*/
|
|
7975
8002
|
compile() {
|
|
7976
8003
|
if (this.prog) return;
|
|
7977
8004
|
this.prog = Compiler.compileSet(this.regexps);
|
|
@@ -7984,6 +8011,12 @@ class RE2Set {
|
|
|
7984
8011
|
longest: false
|
|
7985
8012
|
};
|
|
7986
8013
|
}
|
|
8014
|
+
|
|
8015
|
+
/**
|
|
8016
|
+
* Matches the input against the compiled set of regular expressions.
|
|
8017
|
+
* @param {string|number[]|Uint8Array} input - The input string or UTF-8 byte array to match against.
|
|
8018
|
+
* @returns {number[]} An array of indices representing the patterns that successfully matched the input.
|
|
8019
|
+
*/
|
|
7987
8020
|
match(input) {
|
|
7988
8021
|
if (!this.prog) this.compile();
|
|
7989
8022
|
const machineInput = Utils.isByteArray(input) ? MachineInput.fromUTF8(input) : MachineInput.fromUTF16(input);
|
|
@@ -8009,13 +8042,19 @@ class RE2Set {
|
|
|
8009
8042
|
* Transform JS regex string to RE2 regex string
|
|
8010
8043
|
*/
|
|
8011
8044
|
class TranslateRegExpString {
|
|
8012
|
-
static isUpperCaseAlpha(ch) {
|
|
8013
|
-
return 'A' <= ch && ch <= 'Z';
|
|
8014
|
-
}
|
|
8015
8045
|
static isHexadecimal(ch) {
|
|
8016
8046
|
return '0' <= ch && ch <= '9' || 'A' <= ch && ch <= 'F' || 'a' <= ch && ch <= 'f';
|
|
8017
8047
|
}
|
|
8018
8048
|
static translate(data) {
|
|
8049
|
+
let prefixFlags = '';
|
|
8050
|
+
if (data instanceof RegExp) {
|
|
8051
|
+
if (data.ignoreCase) prefixFlags += 'i';
|
|
8052
|
+
if (data.multiline) prefixFlags += 'm';
|
|
8053
|
+
if (data.dotAll) prefixFlags += 's';
|
|
8054
|
+
|
|
8055
|
+
// execution flags ('g', 'y') are safely ignored here.
|
|
8056
|
+
data = data.source;
|
|
8057
|
+
}
|
|
8019
8058
|
if (typeof data !== 'string') {
|
|
8020
8059
|
return data;
|
|
8021
8060
|
}
|
|
@@ -8026,6 +8065,7 @@ class TranslateRegExpString {
|
|
|
8026
8065
|
result = '(?:)';
|
|
8027
8066
|
changed = true;
|
|
8028
8067
|
}
|
|
8068
|
+
let inCharClass = false;
|
|
8029
8069
|
let i = 0;
|
|
8030
8070
|
while (i < size) {
|
|
8031
8071
|
let ch = data[i];
|
|
@@ -8064,10 +8104,28 @@ class TranslateRegExpString {
|
|
|
8064
8104
|
if (i + 2 < size) {
|
|
8065
8105
|
let nextCh = data[i + 2];
|
|
8066
8106
|
if (nextCh === '{') {
|
|
8067
|
-
|
|
8068
|
-
i
|
|
8069
|
-
|
|
8070
|
-
|
|
8107
|
+
// Must have a closing brace and at least one valid hex digit inside
|
|
8108
|
+
let j = i + 3;
|
|
8109
|
+
let hasHex = false;
|
|
8110
|
+
let closed = false;
|
|
8111
|
+
while (j < size) {
|
|
8112
|
+
const hexChar = data[j];
|
|
8113
|
+
if (hexChar === '}') {
|
|
8114
|
+
closed = true;
|
|
8115
|
+
break;
|
|
8116
|
+
}
|
|
8117
|
+
if (!TranslateRegExpString.isHexadecimal(hexChar)) {
|
|
8118
|
+
break;
|
|
8119
|
+
}
|
|
8120
|
+
hasHex = true;
|
|
8121
|
+
j++;
|
|
8122
|
+
}
|
|
8123
|
+
if (closed && hasHex) {
|
|
8124
|
+
result += '\\x';
|
|
8125
|
+
i += 2;
|
|
8126
|
+
changed = true;
|
|
8127
|
+
continue;
|
|
8128
|
+
}
|
|
8071
8129
|
} else if (i + 5 < size) {
|
|
8072
8130
|
let isHex4 = true;
|
|
8073
8131
|
for (let j = 0; j < 4; j++) {
|
|
@@ -8084,18 +8142,101 @@ class TranslateRegExpString {
|
|
|
8084
8142
|
}
|
|
8085
8143
|
}
|
|
8086
8144
|
}
|
|
8145
|
+
|
|
8146
|
+
// Graceful degradation for invalid/unclosed \u sequences
|
|
8087
8147
|
result += 'u';
|
|
8088
8148
|
i += 2;
|
|
8089
8149
|
changed = true;
|
|
8090
8150
|
continue;
|
|
8091
8151
|
}
|
|
8152
|
+
case 'x':
|
|
8153
|
+
{
|
|
8154
|
+
let isValidHex = false;
|
|
8155
|
+
if (i + 2 < size && data[i + 2] === '{') {
|
|
8156
|
+
// Must have a closing brace and at least one valid hex digit inside
|
|
8157
|
+
let j = i + 3;
|
|
8158
|
+
let hasHex = false;
|
|
8159
|
+
let closed = false;
|
|
8160
|
+
while (j < size) {
|
|
8161
|
+
const hexChar = data[j];
|
|
8162
|
+
if (hexChar === '}') {
|
|
8163
|
+
closed = true;
|
|
8164
|
+
break;
|
|
8165
|
+
}
|
|
8166
|
+
if (!TranslateRegExpString.isHexadecimal(hexChar)) {
|
|
8167
|
+
break;
|
|
8168
|
+
}
|
|
8169
|
+
hasHex = true;
|
|
8170
|
+
j++;
|
|
8171
|
+
}
|
|
8172
|
+
if (closed && hasHex) {
|
|
8173
|
+
isValidHex = true;
|
|
8174
|
+
}
|
|
8175
|
+
} else if (i + 3 < size && TranslateRegExpString.isHexadecimal(data[i + 2]) && TranslateRegExpString.isHexadecimal(data[i + 3])) {
|
|
8176
|
+
isValidHex = true;
|
|
8177
|
+
}
|
|
8178
|
+
if (isValidHex) {
|
|
8179
|
+
result += '\\x';
|
|
8180
|
+
i += 2;
|
|
8181
|
+
} else {
|
|
8182
|
+
result += 'x';
|
|
8183
|
+
i += 2;
|
|
8184
|
+
changed = true;
|
|
8185
|
+
}
|
|
8186
|
+
continue;
|
|
8187
|
+
}
|
|
8188
|
+
// Whitelist of valid RE2/JS alphanumeric escapes
|
|
8189
|
+
case 'n':
|
|
8190
|
+
case 'r':
|
|
8191
|
+
case 't':
|
|
8192
|
+
case 'a':
|
|
8193
|
+
case 'f':
|
|
8194
|
+
case 'v':
|
|
8195
|
+
case 'd':
|
|
8196
|
+
case 'D':
|
|
8197
|
+
case 's':
|
|
8198
|
+
case 'S':
|
|
8199
|
+
case 'w':
|
|
8200
|
+
case 'W':
|
|
8201
|
+
case 'b':
|
|
8202
|
+
case 'B':
|
|
8203
|
+
case 'p':
|
|
8204
|
+
case 'P':
|
|
8205
|
+
case 'A':
|
|
8206
|
+
case 'z':
|
|
8207
|
+
case 'Q':
|
|
8208
|
+
case 'E':
|
|
8209
|
+
case '0':
|
|
8210
|
+
case '1':
|
|
8211
|
+
case '2':
|
|
8212
|
+
case '3':
|
|
8213
|
+
case '4':
|
|
8214
|
+
case '5':
|
|
8215
|
+
case '6':
|
|
8216
|
+
case '7':
|
|
8217
|
+
{
|
|
8218
|
+
result += '\\' + ch;
|
|
8219
|
+
i += 2;
|
|
8220
|
+
continue;
|
|
8221
|
+
}
|
|
8092
8222
|
default:
|
|
8093
8223
|
{
|
|
8094
|
-
result += '\\';
|
|
8095
8224
|
let cp = data.codePointAt(i + 1);
|
|
8096
|
-
let
|
|
8097
|
-
|
|
8098
|
-
|
|
8225
|
+
let isAlphaNum = cp >= 48 && cp <= 57 || cp >= 65 && cp <= 90 || cp >= 97 && cp <= 122;
|
|
8226
|
+
if (isAlphaNum) {
|
|
8227
|
+
// Invalid JS alphanumeric escape sequence (e.g. \8, \9, \e, \K)
|
|
8228
|
+
// Gracefully degrade to the literal character to prevent RE2 syntax crashes
|
|
8229
|
+
let symSize = Utils.charCount(cp);
|
|
8230
|
+
result += data.substring(i + 1, i + 1 + symSize);
|
|
8231
|
+
i += symSize + 1;
|
|
8232
|
+
changed = true;
|
|
8233
|
+
} else {
|
|
8234
|
+
// Escaped symbol (e.g. \., \*, \])
|
|
8235
|
+
result += '\\';
|
|
8236
|
+
let symSize = Utils.charCount(cp);
|
|
8237
|
+
result += data.substring(i + 1, i + 1 + symSize);
|
|
8238
|
+
i += symSize + 1;
|
|
8239
|
+
}
|
|
8099
8240
|
continue;
|
|
8100
8241
|
}
|
|
8101
8242
|
}
|
|
@@ -8105,7 +8246,13 @@ class TranslateRegExpString {
|
|
|
8105
8246
|
i += 1;
|
|
8106
8247
|
changed = true;
|
|
8107
8248
|
continue;
|
|
8108
|
-
} else if (ch === '
|
|
8249
|
+
} else if (ch === '[') {
|
|
8250
|
+
// Track entry into a character class (protects syntax inside)
|
|
8251
|
+
inCharClass = true;
|
|
8252
|
+
} else if (ch === ']') {
|
|
8253
|
+
// Track exit of a character class
|
|
8254
|
+
inCharClass = false;
|
|
8255
|
+
} else if (!inCharClass && ch === '(' && i + 2 < size && data[i + 1] === '?' && data[i + 2] === '<') {
|
|
8109
8256
|
if (i + 3 < size && !'=!>)'.includes(data[i + 3])) {
|
|
8110
8257
|
result += '(?P<';
|
|
8111
8258
|
i += 3;
|
|
@@ -8118,7 +8265,13 @@ class TranslateRegExpString {
|
|
|
8118
8265
|
result += data.substring(i, i + symSize);
|
|
8119
8266
|
i += symSize;
|
|
8120
8267
|
}
|
|
8121
|
-
|
|
8268
|
+
const finalResult = changed ? result : data;
|
|
8269
|
+
|
|
8270
|
+
// Append any extracted inline flags
|
|
8271
|
+
if (prefixFlags.length > 0) {
|
|
8272
|
+
return `(?${prefixFlags})${finalResult}`;
|
|
8273
|
+
}
|
|
8274
|
+
return finalResult;
|
|
8122
8275
|
}
|
|
8123
8276
|
}
|
|
8124
8277
|
|
|
@@ -8196,7 +8349,7 @@ class RE2JS {
|
|
|
8196
8349
|
* RE2JS-compatible syntax, and handling Unicode sequences properly. It ensures that the
|
|
8197
8350
|
* resulting regex is safe and properly formatted before compilation.
|
|
8198
8351
|
*
|
|
8199
|
-
* @param {string} expr - The regular expression string to be translated.
|
|
8352
|
+
* @param {string|RegExp} expr - The regular expression string to be translated.
|
|
8200
8353
|
* @returns {string} - The transformed regular expression string, ready for compilation.
|
|
8201
8354
|
*/
|
|
8202
8355
|
static translateRegExp(expr) {
|
|
@@ -8240,7 +8393,7 @@ class RE2JS {
|
|
|
8240
8393
|
* Matches a string against a regular expression.
|
|
8241
8394
|
*
|
|
8242
8395
|
* @param {string} regex the regular expression
|
|
8243
|
-
* @param {string|number[]} input the input
|
|
8396
|
+
* @param {string|number[]|Uint8Array} input the input
|
|
8244
8397
|
* @returns {boolean} true if the regular expression matches the entire input
|
|
8245
8398
|
* @throws RE2JSSyntaxException if the regular expression is malformed
|
|
8246
8399
|
*/
|
|
@@ -8307,7 +8460,7 @@ class RE2JS {
|
|
|
8307
8460
|
/**
|
|
8308
8461
|
* Matches a string against a regular expression.
|
|
8309
8462
|
*
|
|
8310
|
-
* @param {string|number[]} input the input
|
|
8463
|
+
* @param {string|number[]|Uint8Array} input the input
|
|
8311
8464
|
* @returns {boolean} true if the regular expression matches the entire input
|
|
8312
8465
|
*/
|
|
8313
8466
|
matches(input) {
|
|
@@ -8317,7 +8470,7 @@ class RE2JS {
|
|
|
8317
8470
|
/**
|
|
8318
8471
|
* Creates a new {@code Matcher} matching the pattern against the input.
|
|
8319
8472
|
*
|
|
8320
|
-
* @param {string|number[]} input the input string
|
|
8473
|
+
* @param {string|number[]|Uint8Array} input the input string
|
|
8321
8474
|
* @returns {Matcher}
|
|
8322
8475
|
*/
|
|
8323
8476
|
matcher(input) {
|
|
@@ -8333,7 +8486,7 @@ class RE2JS {
|
|
|
8333
8486
|
* a boolean and does not extract capture groups, it bypasses the `Matcher` overhead
|
|
8334
8487
|
* and guarantees execution on the high-speed DFA engine whenever possible.
|
|
8335
8488
|
*
|
|
8336
|
-
* @param {string|number[]} input - The input string or UTF-8 byte array to test against.
|
|
8489
|
+
* @param {string|number[]|Uint8Array} input - The input string or UTF-8 byte array to test against.
|
|
8337
8490
|
* @returns {boolean} `true` if the pattern is found anywhere in the input, `false` otherwise.
|
|
8338
8491
|
*/
|
|
8339
8492
|
test(input) {
|
|
@@ -8352,7 +8505,7 @@ class RE2JS {
|
|
|
8352
8505
|
* faster because it does not request capture group data. By requesting 0 capture groups,
|
|
8353
8506
|
* it securely routes execution through the DFA fast-path.
|
|
8354
8507
|
*
|
|
8355
|
-
* @param {string|number[]} input - The input string or UTF-8 byte array to test against.
|
|
8508
|
+
* @param {string|number[]|Uint8Array} input - The input string or UTF-8 byte array to test against.
|
|
8356
8509
|
* @returns {boolean} `true` if the exact input string fully matches the pattern, `false` otherwise.
|
|
8357
8510
|
*/
|
|
8358
8511
|
testExact(input) {
|