re2js 2.2.3 → 2.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -0
- package/build/index.cjs.cjs +184 -27
- package/build/index.cjs.cjs.map +1 -1
- package/build/index.esm.d.ts +54 -78
- package/build/index.esm.d.ts.map +1 -1
- package/build/index.esm.js +184 -27
- package/build/index.esm.js.map +1 -1
- package/build/index.umd.js +184 -27
- package/build/index.umd.js.map +1 -1
- package/package.json +1 -1
package/build/index.esm.d.ts
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
export type RE2JS_Pattern = any;
|
|
1
2
|
/**
|
|
2
3
|
* A stateful iterator that interprets a regex {@code RE2JS} on a specific input.
|
|
3
4
|
*
|
|
@@ -20,6 +21,9 @@
|
|
|
20
21
|
*
|
|
21
22
|
* @author rsc@google.com (Russ Cox)
|
|
22
23
|
*/
|
|
24
|
+
/**
|
|
25
|
+
* @typedef {import('./index').RE2JS} RE2JS_Pattern
|
|
26
|
+
*/
|
|
23
27
|
export class Matcher {
|
|
24
28
|
/**
|
|
25
29
|
* Quotes '\' and '$' in {@code s}, so that the returned string could be used in
|
|
@@ -32,11 +36,15 @@ export class Matcher {
|
|
|
32
36
|
static quoteReplacement(str: string, javaMode?: boolean): string;
|
|
33
37
|
/**
|
|
34
38
|
*
|
|
35
|
-
* @param {
|
|
36
|
-
* @param {
|
|
39
|
+
* @param {RE2JS_Pattern} pattern
|
|
40
|
+
* @param {Uint8Array|number[]|string} input
|
|
41
|
+
*/
|
|
42
|
+
constructor(pattern: RE2JS_Pattern, input: Uint8Array | number[] | string);
|
|
43
|
+
/**
|
|
44
|
+
* The pattern being matched.
|
|
45
|
+
* @type {RE2JS_Pattern}
|
|
37
46
|
*/
|
|
38
|
-
|
|
39
|
-
patternInput: RE2JS;
|
|
47
|
+
patternInput: RE2JS_Pattern;
|
|
40
48
|
/** @type {number} */
|
|
41
49
|
patternGroupCount: number;
|
|
42
50
|
/** @type {number[]} */
|
|
@@ -47,9 +55,9 @@ export class Matcher {
|
|
|
47
55
|
numberOfInstructions: number;
|
|
48
56
|
/**
|
|
49
57
|
* Returns the {@code RE2JS} associated with this {@code Matcher}.
|
|
50
|
-
* @returns {
|
|
58
|
+
* @returns {RE2JS_Pattern}
|
|
51
59
|
*/
|
|
52
|
-
pattern():
|
|
60
|
+
pattern(): RE2JS_Pattern;
|
|
53
61
|
/**
|
|
54
62
|
* Resets the {@code Matcher}, rewinding input and discarding any match information.
|
|
55
63
|
*
|
|
@@ -65,11 +73,11 @@ export class Matcher {
|
|
|
65
73
|
anchorFlag: number | undefined;
|
|
66
74
|
/**
|
|
67
75
|
* Resets the {@code Matcher} and changes the input.
|
|
68
|
-
* @param {
|
|
76
|
+
* @param {import('./MatcherInput').MatcherInputBase} input
|
|
69
77
|
* @returns {Matcher} the {@code Matcher} itself, for chained method calls
|
|
70
78
|
*/
|
|
71
|
-
resetMatcherInput(input:
|
|
72
|
-
matcherInput:
|
|
79
|
+
resetMatcherInput(input: any): Matcher;
|
|
80
|
+
matcherInput: any;
|
|
73
81
|
/**
|
|
74
82
|
* Returns the start of the named group of the most recent match, or -1 if the group was not
|
|
75
83
|
* matched.
|
|
@@ -98,7 +106,7 @@ export class Matcher {
|
|
|
98
106
|
/**
|
|
99
107
|
* Returns the named group of the most recent match, or {@code null} if the group was not matched.
|
|
100
108
|
* @param {string|number} [group=0]
|
|
101
|
-
* @returns {
|
|
109
|
+
* @returns {string|null}
|
|
102
110
|
*/
|
|
103
111
|
group(group?: string | number): string | null;
|
|
104
112
|
/**
|
|
@@ -137,11 +145,11 @@ export class Matcher {
|
|
|
137
145
|
* Matches the input against the pattern (unanchored), starting at a specified position. If there
|
|
138
146
|
* is a match, {@code find} sets the match state to describe it.
|
|
139
147
|
*
|
|
140
|
-
* @param {number} [start=null] the input position where the search begins
|
|
148
|
+
* @param {number|null} [start=null] the input position where the search begins
|
|
141
149
|
* @returns {boolean} if it finds a match
|
|
142
150
|
* @throws IndexOutOfBoundsException if start is not a valid input position
|
|
143
151
|
*/
|
|
144
|
-
find(start?: number): boolean;
|
|
152
|
+
find(start?: number | null): boolean;
|
|
145
153
|
/**
|
|
146
154
|
* Helper: does match starting at start, with RE2 anchor flag.
|
|
147
155
|
* @param {number} startByte
|
|
@@ -299,10 +307,10 @@ export class RE2JS {
|
|
|
299
307
|
* RE2JS-compatible syntax, and handling Unicode sequences properly. It ensures that the
|
|
300
308
|
* resulting regex is safe and properly formatted before compilation.
|
|
301
309
|
*
|
|
302
|
-
* @param {string} expr - The regular expression string to be translated.
|
|
310
|
+
* @param {string|RegExp} expr - The regular expression string to be translated.
|
|
303
311
|
* @returns {string} - The transformed regular expression string, ready for compilation.
|
|
304
312
|
*/
|
|
305
|
-
static translateRegExp(expr: string): string;
|
|
313
|
+
static translateRegExp(expr: string | RegExp): string;
|
|
306
314
|
/**
|
|
307
315
|
* Helper: create new RE2JS with given regex and flags. Flregex is the regex with flags applied.
|
|
308
316
|
* @param {string} regex
|
|
@@ -314,11 +322,11 @@ export class RE2JS {
|
|
|
314
322
|
* Matches a string against a regular expression.
|
|
315
323
|
*
|
|
316
324
|
* @param {string} regex the regular expression
|
|
317
|
-
* @param {string|number[]} input the input
|
|
325
|
+
* @param {string|number[]|Uint8Array} input the input
|
|
318
326
|
* @returns {boolean} true if the regular expression matches the entire input
|
|
319
327
|
* @throws RE2JSSyntaxException if the regular expression is malformed
|
|
320
328
|
*/
|
|
321
|
-
static matches(regex: string, input: string | number[]): boolean;
|
|
329
|
+
static matches(regex: string, input: string | number[] | Uint8Array): boolean;
|
|
322
330
|
/**
|
|
323
331
|
* This is visible for testing.
|
|
324
332
|
* @private
|
|
@@ -351,37 +359,37 @@ export class RE2JS {
|
|
|
351
359
|
/**
|
|
352
360
|
* Matches a string against a regular expression.
|
|
353
361
|
*
|
|
354
|
-
* @param {string|number[]} input the input
|
|
362
|
+
* @param {string|number[]|Uint8Array} input the input
|
|
355
363
|
* @returns {boolean} true if the regular expression matches the entire input
|
|
356
364
|
*/
|
|
357
|
-
matches(input: string | number[]): boolean;
|
|
365
|
+
matches(input: string | number[] | Uint8Array): boolean;
|
|
358
366
|
/**
|
|
359
367
|
* Creates a new {@code Matcher} matching the pattern against the input.
|
|
360
368
|
*
|
|
361
|
-
* @param {string|number[]} input the input string
|
|
369
|
+
* @param {string|number[]|Uint8Array} input the input string
|
|
362
370
|
* @returns {Matcher}
|
|
363
371
|
*/
|
|
364
|
-
matcher(input: string | number[]): Matcher;
|
|
372
|
+
matcher(input: string | number[] | Uint8Array): Matcher;
|
|
365
373
|
/**
|
|
366
374
|
* Tests whether the regular expression matches any part of the input string.
|
|
367
375
|
* Performance Note: This method is highly optimized. Because it only returns
|
|
368
376
|
* a boolean and does not extract capture groups, it bypasses the `Matcher` overhead
|
|
369
377
|
* and guarantees execution on the high-speed DFA engine whenever possible.
|
|
370
378
|
*
|
|
371
|
-
* @param {string|number[]} input - The input string or UTF-8 byte array to test against.
|
|
379
|
+
* @param {string|number[]|Uint8Array} input - The input string or UTF-8 byte array to test against.
|
|
372
380
|
* @returns {boolean} `true` if the pattern is found anywhere in the input, `false` otherwise.
|
|
373
381
|
*/
|
|
374
|
-
test(input: string | number[]): boolean;
|
|
382
|
+
test(input: string | number[] | Uint8Array): boolean;
|
|
375
383
|
/**
|
|
376
384
|
* Tests whether the regular expression matches the ENTIRE input string.
|
|
377
385
|
* * **Performance Note:** This operates identically to `.matches()`, but is significantly
|
|
378
386
|
* faster because it does not request capture group data. By requesting 0 capture groups,
|
|
379
387
|
* it securely routes execution through the DFA fast-path.
|
|
380
388
|
*
|
|
381
|
-
* @param {string|number[]} input - The input string or UTF-8 byte array to test against.
|
|
389
|
+
* @param {string|number[]|Uint8Array} input - The input string or UTF-8 byte array to test against.
|
|
382
390
|
* @returns {boolean} `true` if the exact input string fully matches the pattern, `false` otherwise.
|
|
383
391
|
*/
|
|
384
|
-
testExact(input: string | number[]): boolean;
|
|
392
|
+
testExact(input: string | number[] | Uint8Array): boolean;
|
|
385
393
|
/**
|
|
386
394
|
* Splits input around instances of the regular expression. It returns an array giving the strings
|
|
387
395
|
* that occur before, between, and after instances of the regular expression.
|
|
@@ -481,9 +489,17 @@ export class RE2JSSyntaxException extends RE2JSException {
|
|
|
481
489
|
getPattern(): string | null;
|
|
482
490
|
}
|
|
483
491
|
export class RE2Set {
|
|
492
|
+
/** @type {number} */
|
|
484
493
|
static UNANCHORED: number;
|
|
494
|
+
/** @type {number} */
|
|
485
495
|
static ANCHOR_START: number;
|
|
496
|
+
/** @type {number} */
|
|
486
497
|
static ANCHOR_BOTH: number;
|
|
498
|
+
/**
|
|
499
|
+
* Constructs a new RE2Set with the specified anchor mode and flags.
|
|
500
|
+
* @param {number} [anchor=RE2Set.UNANCHORED] - The anchoring mode (e.g., RE2Set.UNANCHORED).
|
|
501
|
+
* @param {number} [flags=0] - The public flags to apply to all patterns in the set.
|
|
502
|
+
*/
|
|
487
503
|
constructor(anchor?: number, flags?: number);
|
|
488
504
|
anchor: number;
|
|
489
505
|
jsFlags: number;
|
|
@@ -498,49 +514,26 @@ export class RE2Set {
|
|
|
498
514
|
prefixRune: number;
|
|
499
515
|
longest: boolean;
|
|
500
516
|
} | null;
|
|
501
|
-
add(pattern: any): number;
|
|
502
|
-
compile(): void;
|
|
503
|
-
match(input: any): any[];
|
|
504
|
-
}
|
|
505
|
-
declare class Utf8MatcherInput extends MatcherInputBase {
|
|
506
|
-
constructor(bytes?: null);
|
|
507
|
-
bytes: any;
|
|
508
|
-
getEncoding(): any;
|
|
509
|
-
/**
|
|
510
|
-
*
|
|
511
|
-
* @returns {string}
|
|
512
|
-
*/
|
|
513
|
-
asCharSequence(): string;
|
|
514
|
-
/**
|
|
515
|
-
*
|
|
516
|
-
* @returns {number[]}
|
|
517
|
-
*/
|
|
518
|
-
asBytes(): number[];
|
|
519
|
-
/**
|
|
520
|
-
*
|
|
521
|
-
* @returns {number}
|
|
522
|
-
*/
|
|
523
|
-
length(): number;
|
|
524
|
-
}
|
|
525
|
-
declare class Utf16MatcherInput extends MatcherInputBase {
|
|
526
|
-
constructor(charSequence?: null);
|
|
527
|
-
charSequence: any;
|
|
528
|
-
getEncoding(): any;
|
|
529
517
|
/**
|
|
530
|
-
*
|
|
531
|
-
*
|
|
518
|
+
* Adds a new regular expression pattern to the set.
|
|
519
|
+
* Patterns cannot be added after the set has been compiled.
|
|
520
|
+
* @param {string} pattern - The regular expression pattern to add.
|
|
521
|
+
* @returns {number} The integer index assigned to the added pattern.
|
|
522
|
+
* @throws {RE2JSCompileException} If patterns are added after compilation.
|
|
532
523
|
*/
|
|
533
|
-
|
|
524
|
+
add(pattern: string): number;
|
|
534
525
|
/**
|
|
535
|
-
*
|
|
536
|
-
*
|
|
526
|
+
* Compiles the added patterns into a single state machine.
|
|
527
|
+
* This is automatically called on the first match if not called explicitly.
|
|
528
|
+
* @returns {void}
|
|
537
529
|
*/
|
|
538
|
-
|
|
530
|
+
compile(): void;
|
|
539
531
|
/**
|
|
540
|
-
*
|
|
541
|
-
* @
|
|
532
|
+
* Matches the input against the compiled set of regular expressions.
|
|
533
|
+
* @param {string|number[]|Uint8Array} input - The input string or UTF-8 byte array to match against.
|
|
534
|
+
* @returns {number[]} An array of indices representing the patterns that successfully matched the input.
|
|
542
535
|
*/
|
|
543
|
-
|
|
536
|
+
match(input: string | number[] | Uint8Array): number[];
|
|
544
537
|
}
|
|
545
538
|
/**
|
|
546
539
|
* A Prog is a compiled regular expression program.
|
|
@@ -587,22 +580,5 @@ declare class DFA {
|
|
|
587
580
|
match(input: any, pos: any, anchor: any): boolean | null;
|
|
588
581
|
matchSet(input: any, pos: any, anchor: any): any[] | null;
|
|
589
582
|
}
|
|
590
|
-
/**
|
|
591
|
-
* Abstract the representations of input text supplied to Matcher.
|
|
592
|
-
*/
|
|
593
|
-
declare class MatcherInputBase {
|
|
594
|
-
static Encoding: any;
|
|
595
|
-
getEncoding(): void;
|
|
596
|
-
/**
|
|
597
|
-
*
|
|
598
|
-
* @returns {boolean}
|
|
599
|
-
*/
|
|
600
|
-
isUTF8Encoding(): boolean;
|
|
601
|
-
/**
|
|
602
|
-
*
|
|
603
|
-
* @returns {boolean}
|
|
604
|
-
*/
|
|
605
|
-
isUTF16Encoding(): boolean;
|
|
606
|
-
}
|
|
607
583
|
export {};
|
|
608
584
|
//# sourceMappingURL=index.esm.d.ts.map
|
package/build/index.esm.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.esm.d.ts","sourceRoot":"","sources":["index.esm.js"],"names":[],"mappings":"AA6xCA;;;;;;;;;;;;;;;;;;;;;GAqBG;
|
|
1
|
+
{"version":3,"file":"index.esm.d.ts","sourceRoot":"","sources":["index.esm.js"],"names":[],"mappings":";AA6xCA;;;;;;;;;;;;;;;;;;;;;GAqBG;AAEH;;GAEG;AAEH;IACE;;;;;;;OAOG;IACH,6BAJW,MAAM,aACN,OAAO,GACL,MAAM,CA2BlB;IACD;;;;OAIG;IACH,qBAHW,aAAa,SACb,UAAU,GAAC,MAAM,EAAE,GAAC,MAAM,EA6BpC;IAvBC;;;OAGG;IACH,cAFU,aAAa,CAEI;IAG3B,qBAAqB;IACrB,mBADW,MAAM,CACqC;IAEtD,uBAAuB;IACvB,QADW,MAAM,EAAE,CACH;IAChB,qCAAqC;IACrC,aADW,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CACC;IAClC,qBAAqB;IACrB,sBADW,MAAM,CACqC;IAUxD;;;OAGG;IACH,WAFa,aAAa,CAIzB;IAED;;;;OAIG;IACH,SAFa,OAAO,CAiBnB;IAbC,qBAAqB;IACrB,uCAAoD;IAEpD,qBAAqB;IACrB,8BAAkB;IAElB,8BAAqB;IAGrB,+BAAsB;IAEtB,+BAAmB;IAIrB;;;;OAIG;IACH,+BAFa,OAAO,CASnB;IAHC,kBAAyB;IAK3B;;;;;OAKG;IACH,cAHW,MAAM,GAAC,MAAM,GACX,MAAM,CAYlB;IAED;;;;;OAKG;IACH,YAHW,MAAM,GAAC,MAAM,GACX,MAAM,CAYlB;IAED;;;;;;;;;OASG;IACH,eAFa,MAAM,CAIlB;IAED;;;;OAIG;IACH,cAHW,MAAM,GAAC,MAAM,GACX,MAAM,GAAC,IAAI,CAgBvB;IAED;;;;OAIG;IACH,kBAFa,MAAM,CAAC,MAAM,EAAE,MAAM,GAAC,IAAI,CAAC,CAWvC;IAED;;;;OAIG;IACH,cAFa,MAAM,CAIlB;IAED;;;;OAIG;IACH,kBAkBC;IAED;;;;;OAKG;IACH,WAFa,OAAO,CAInB;IAED;;;;;OAKG;IACH,aAFa,OAAO,CAInB;IAED;;;;;;;OAOG;IACH,aAJW,MAAM,GAAC,IAAI,GACT,OAAO,CA4BnB;IAED;;;;;;OAMG;IACH,iBAaC;IAED;;;;;OAKG;IACH,iBAJW,MAAM,OACN,MAAM,GACJ,MAAM,CAOlB;IAED;;;OAGG;IACH,eAFa,MAAM,CAIlB;IAED;;;;;;;;;;;;;;;;;;;;OAoBG;IACH,0BAUC;IAED;;;;OAIG;IACH,sCAgEC;IAED;;;;OAIG;IACH,oCAuGC;IAED;;;;OAIG;IACH,cAFa,MAAM,CAIlB;IAED;;;;;;;;OAQG;IACH,wBALW,MAAM,aACN,OAAO,GACL,MAAM,CAKlB;IAED;;;;;;;;OAQG;IACH,0BALW,MAAM,aACN,OAAO,GACL,MAAM,CAKlB;IAED;;;;;;;OAOG;IACH,gBAWC;CACF;AAwsMD;;;;;;;;;GASG;AACH;IACE;;OAEG;IACH,gCAAuD;IACvD;;OAEG;IACH,sBAAmC;IACnC;;;OAGG;IACH,yBAAyC;IACzC;;OAEG;IACH,sCAAmE;IACnE;;OAEG;IACH,6BAAiD;IACjD;;OAEG;IACH,2BAA6C;IAE7C;;;;;;;;;;OAUG;IACH,kBAHW,MAAM,GACJ,MAAM,CAIlB;IAED;;;;;;;;;OASG;IACH,6BAJW,MAAM,aACN,OAAO,GACL,MAAM,CAIlB;IAED;;;;;;;;;;OAUG;IACH,6BAHW,MAAM,GAAC,MAAM,GACX,MAAM,CAIlB;IAED;;;;;OAKG;IACH,sBAJW,MAAM,UACN,MAAM,GACJ,KAAK,CA2BjB;IAED;;;;;;;OAOG;IACH,sBALW,MAAM,SACN,MAAM,GAAC,MAAM,EAAE,GAAC,UAAU,GACxB,OAAO,CAKnB;IAED;;;OAGG;IACH,wBAWC;IAED;;;;OAIG;IACH,qBAHW,MAAM,SACN,MAAM,EAOhB;IAHC,qBAA2B;IAE3B,mBAAuB;IAGzB;;;OAGG;IACH,cAEC;IAED;;;OAGG;IACH,SAFa,MAAM,CAIlB;IAED;;;OAGG;IACH,WAFa,MAAM,CAIlB;IACD,WAEC;IAED;;;;;OAKG;IACH,eAHW,MAAM,GAAC,MAAM,EAAE,GAAC,UAAU,GACxB,OAAO,CAInB;IAED;;;;;OAKG;IACH,eAHW,MAAM,GAAC,MAAM,EAAE,GAAC,UAAU,GACxB,OAAO,CAOnB;IAED;;;;;;;;OAQG;IACH,YAHW,MAAM,GAAC,MAAM,EAAE,GAAC,UAAU,GACxB,OAAO,CAUnB;IAED;;;;;;;;OAQG;IACH,iBAHW,MAAM,GAAC,MAAM,EAAE,GAAC,UAAU,GACxB,OAAO,CAKnB;IAED;;;;;;;;;;;;OAYG;IACH,aAJW,MAAM,UACN,MAAM,GACJ,MAAM,EAAE,CAgDpB;IAED;;;OAGG;IACH,YAFa,MAAM,CAIlB;IAED;;;;;;;;;OASG;IACH,eAFa,MAAM,CAIlB;IAED;;;;;OAKG;IACH,cAFa,MAAM,CAIlB;IAED;;;;OAIG;IACH,eAFa,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAIlC;IAED;;;;OAIG;IACH,cAHW,GAAC,GACC,OAAO,CAUnB;CACF;AA1sOD;;GAEG;AACH;CAMC;AAxDD;IACE,8BAA8B;IAC9B,qBADY,MAAM,EAIjB;CACF;AA+DD;;GAEG;AACH;CAMC;AApBD;;GAEG;AACH;CAMC;AAaD;;GAEG;AACH;CAMC;AAjFD;;GAEG;AACH;IACE;;;OAGG;IACH,mBAHW,MAAM,UACN,MAAM,GAAC,IAAI,EAcrB;IAJC,qBAAqB;IACrB,OADW,MAAM,CACC;IAClB,0BAA0B;IAC1B,OADW,MAAM,GAAC,IAAI,CACJ;IAGpB;;;OAGG;IACH,kBAFa,MAAM,CAIlB;IAED;;;OAGG;IACH,cAFa,MAAM,GAAC,IAAI,CAIvB;CACF;AAyhND;IACE,qBAAqB;IACrB,mBADW,MAAM,CACuB;IACxC,qBAAqB;IACrB,qBADW,MAAM,CAC2B;IAC5C,qBAAqB;IACrB,oBADW,MAAM,CACyB;IAE1C;;;;OAIG;IACH,qBAHW,MAAM,UACN,MAAM,EAiBhB;IAdC,eAAoB;IACpB,gBAAoB;IAQpB,iBAAwB;IACxB,eAAiB;IACjB,kBAAgB;IAChB,gBAAe;IACf;;;;;;aAAoB;IAGtB;;;;;;OAMG;IACH,aAJW,MAAM,GACJ,MAAM,CAoBlB;IAED;;;;OAIG;IACH,WAFa,IAAI,CAahB;IAED;;;;OAIG;IACH,aAHW,MAAM,GAAC,MAAM,EAAE,GAAC,UAAU,GACxB,MAAM,EAAE,CAoBpB;CACF;AAzrHD;;GAEG;AACH;IAEI,YAAc;IACd,cAAc;IAGd,eAAe;IACf,gBAAkB;IAClB,cAAc;IAKhB,sBAEC;IAGD,kBAEC;IAID,uBAEC;IAID,sBAOC;IAKD,+BAWC;IAID,oBAoBC;IAeD,8BAYC;IACD,8BAYC;IACD;;;OAGG;IACH,YAFa,MAAM,CAelB;CACF;AArxDD;IACE,gCAA4B;IAC5B,uBASC;IARC,UAAgB;IAChB,0BAA2B;IAC3B,mBAAmB;IACnB,gBAAsB;IACtB,mBAAuB;IACvB,oBAAoB;IACpB,gBAAmB;IACnB,cAAc;IAIhB;;;;aAuCC;IAGD,wBAyDC;IACD,mBAyCC;IAGD,kDA+CC;IAGD,yDA+CC;IAGD,0DAwCC;CACF"}
|
package/build/index.esm.js
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
* re2js
|
|
3
3
|
* RE2JS is the JavaScript port of RE2, a regular expression engine that provides linear time matching
|
|
4
4
|
*
|
|
5
|
-
* @version v2.
|
|
5
|
+
* @version v2.3.0
|
|
6
6
|
* @author Oleksii Vasyliev
|
|
7
7
|
* @homepage https://github.com/le0pard/re2js#readme
|
|
8
8
|
* @repository github:le0pard/re2js
|
|
@@ -1329,6 +1329,11 @@ class RE2JSInternalException extends RE2JSException {
|
|
|
1329
1329
|
*
|
|
1330
1330
|
* @author rsc@google.com (Russ Cox)
|
|
1331
1331
|
*/
|
|
1332
|
+
|
|
1333
|
+
/**
|
|
1334
|
+
* @typedef {import('./index').RE2JS} RE2JS_Pattern
|
|
1335
|
+
*/
|
|
1336
|
+
|
|
1332
1337
|
class Matcher {
|
|
1333
1338
|
/**
|
|
1334
1339
|
* Quotes '\' and '$' in {@code s}, so that the returned string could be used in
|
|
@@ -1366,14 +1371,17 @@ class Matcher {
|
|
|
1366
1371
|
}
|
|
1367
1372
|
/**
|
|
1368
1373
|
*
|
|
1369
|
-
* @param {
|
|
1370
|
-
* @param {
|
|
1374
|
+
* @param {RE2JS_Pattern} pattern
|
|
1375
|
+
* @param {Uint8Array|number[]|string} input
|
|
1371
1376
|
*/
|
|
1372
1377
|
constructor(pattern, input) {
|
|
1373
1378
|
if (pattern === null) {
|
|
1374
1379
|
throw new Error('pattern is null');
|
|
1375
1380
|
}
|
|
1376
|
-
|
|
1381
|
+
/**
|
|
1382
|
+
* The pattern being matched.
|
|
1383
|
+
* @type {RE2JS_Pattern}
|
|
1384
|
+
*/
|
|
1377
1385
|
this.patternInput = pattern;
|
|
1378
1386
|
const re2 = this.patternInput.re2();
|
|
1379
1387
|
// The number of submatches (groups) in the pattern.
|
|
@@ -1397,7 +1405,7 @@ class Matcher {
|
|
|
1397
1405
|
|
|
1398
1406
|
/**
|
|
1399
1407
|
* Returns the {@code RE2JS} associated with this {@code Matcher}.
|
|
1400
|
-
* @returns {
|
|
1408
|
+
* @returns {RE2JS_Pattern}
|
|
1401
1409
|
*/
|
|
1402
1410
|
pattern() {
|
|
1403
1411
|
return this.patternInput;
|
|
@@ -1427,7 +1435,7 @@ class Matcher {
|
|
|
1427
1435
|
|
|
1428
1436
|
/**
|
|
1429
1437
|
* Resets the {@code Matcher} and changes the input.
|
|
1430
|
-
* @param {
|
|
1438
|
+
* @param {import('./MatcherInput').MatcherInputBase} input
|
|
1431
1439
|
* @returns {Matcher} the {@code Matcher} itself, for chained method calls
|
|
1432
1440
|
*/
|
|
1433
1441
|
resetMatcherInput(input) {
|
|
@@ -1492,7 +1500,7 @@ class Matcher {
|
|
|
1492
1500
|
/**
|
|
1493
1501
|
* Returns the named group of the most recent match, or {@code null} if the group was not matched.
|
|
1494
1502
|
* @param {string|number} [group=0]
|
|
1495
|
-
* @returns {
|
|
1503
|
+
* @returns {string|null}
|
|
1496
1504
|
*/
|
|
1497
1505
|
group(group = 0) {
|
|
1498
1506
|
if (typeof group === 'string') {
|
|
@@ -1584,7 +1592,7 @@ class Matcher {
|
|
|
1584
1592
|
* Matches the input against the pattern (unanchored), starting at a specified position. If there
|
|
1585
1593
|
* is a match, {@code find} sets the match state to describe it.
|
|
1586
1594
|
*
|
|
1587
|
-
* @param {number} [start=null] the input position where the search begins
|
|
1595
|
+
* @param {number|null} [start=null] the input position where the search begins
|
|
1588
1596
|
* @returns {boolean} if it finds a match
|
|
1589
1597
|
* @throws IndexOutOfBoundsException if start is not a valid input position
|
|
1590
1598
|
*/
|
|
@@ -7935,9 +7943,18 @@ class RE2 {
|
|
|
7935
7943
|
}
|
|
7936
7944
|
|
|
7937
7945
|
class RE2Set {
|
|
7946
|
+
/** @type {number} */
|
|
7938
7947
|
static UNANCHORED = RE2Flags.UNANCHORED;
|
|
7948
|
+
/** @type {number} */
|
|
7939
7949
|
static ANCHOR_START = RE2Flags.ANCHOR_START;
|
|
7950
|
+
/** @type {number} */
|
|
7940
7951
|
static ANCHOR_BOTH = RE2Flags.ANCHOR_BOTH;
|
|
7952
|
+
|
|
7953
|
+
/**
|
|
7954
|
+
* Constructs a new RE2Set with the specified anchor mode and flags.
|
|
7955
|
+
* @param {number} [anchor=RE2Set.UNANCHORED] - The anchoring mode (e.g., RE2Set.UNANCHORED).
|
|
7956
|
+
* @param {number} [flags=0] - The public flags to apply to all patterns in the set.
|
|
7957
|
+
*/
|
|
7941
7958
|
constructor(anchor = RE2Set.UNANCHORED, flags = 0) {
|
|
7942
7959
|
this.anchor = anchor;
|
|
7943
7960
|
this.jsFlags = flags;
|
|
@@ -7954,6 +7971,14 @@ class RE2Set {
|
|
|
7954
7971
|
this.dfa = null;
|
|
7955
7972
|
this.dummyRe2 = null;
|
|
7956
7973
|
}
|
|
7974
|
+
|
|
7975
|
+
/**
|
|
7976
|
+
* Adds a new regular expression pattern to the set.
|
|
7977
|
+
* Patterns cannot be added after the set has been compiled.
|
|
7978
|
+
* @param {string} pattern - The regular expression pattern to add.
|
|
7979
|
+
* @returns {number} The integer index assigned to the added pattern.
|
|
7980
|
+
* @throws {RE2JSCompileException} If patterns are added after compilation.
|
|
7981
|
+
*/
|
|
7957
7982
|
add(pattern) {
|
|
7958
7983
|
if (this.prog) {
|
|
7959
7984
|
throw new RE2JSCompileException('Cannot add patterns after compile');
|
|
@@ -7972,6 +7997,12 @@ class RE2Set {
|
|
|
7972
7997
|
this.regexps.push(Simplify.simplify(re));
|
|
7973
7998
|
return this.regexps.length - 1;
|
|
7974
7999
|
}
|
|
8000
|
+
|
|
8001
|
+
/**
|
|
8002
|
+
* Compiles the added patterns into a single state machine.
|
|
8003
|
+
* This is automatically called on the first match if not called explicitly.
|
|
8004
|
+
* @returns {void}
|
|
8005
|
+
*/
|
|
7975
8006
|
compile() {
|
|
7976
8007
|
if (this.prog) return;
|
|
7977
8008
|
this.prog = Compiler.compileSet(this.regexps);
|
|
@@ -7984,6 +8015,12 @@ class RE2Set {
|
|
|
7984
8015
|
longest: false
|
|
7985
8016
|
};
|
|
7986
8017
|
}
|
|
8018
|
+
|
|
8019
|
+
/**
|
|
8020
|
+
* Matches the input against the compiled set of regular expressions.
|
|
8021
|
+
* @param {string|number[]|Uint8Array} input - The input string or UTF-8 byte array to match against.
|
|
8022
|
+
* @returns {number[]} An array of indices representing the patterns that successfully matched the input.
|
|
8023
|
+
*/
|
|
7987
8024
|
match(input) {
|
|
7988
8025
|
if (!this.prog) this.compile();
|
|
7989
8026
|
const machineInput = Utils.isByteArray(input) ? MachineInput.fromUTF8(input) : MachineInput.fromUTF16(input);
|
|
@@ -8009,13 +8046,19 @@ class RE2Set {
|
|
|
8009
8046
|
* Transform JS regex string to RE2 regex string
|
|
8010
8047
|
*/
|
|
8011
8048
|
class TranslateRegExpString {
|
|
8012
|
-
static isUpperCaseAlpha(ch) {
|
|
8013
|
-
return 'A' <= ch && ch <= 'Z';
|
|
8014
|
-
}
|
|
8015
8049
|
static isHexadecimal(ch) {
|
|
8016
8050
|
return '0' <= ch && ch <= '9' || 'A' <= ch && ch <= 'F' || 'a' <= ch && ch <= 'f';
|
|
8017
8051
|
}
|
|
8018
8052
|
static translate(data) {
|
|
8053
|
+
let prefixFlags = '';
|
|
8054
|
+
if (data instanceof RegExp) {
|
|
8055
|
+
if (data.ignoreCase) prefixFlags += 'i';
|
|
8056
|
+
if (data.multiline) prefixFlags += 'm';
|
|
8057
|
+
if (data.dotAll) prefixFlags += 's';
|
|
8058
|
+
|
|
8059
|
+
// execution flags ('g', 'y') are safely ignored here.
|
|
8060
|
+
data = data.source;
|
|
8061
|
+
}
|
|
8019
8062
|
if (typeof data !== 'string') {
|
|
8020
8063
|
return data;
|
|
8021
8064
|
}
|
|
@@ -8026,6 +8069,7 @@ class TranslateRegExpString {
|
|
|
8026
8069
|
result = '(?:)';
|
|
8027
8070
|
changed = true;
|
|
8028
8071
|
}
|
|
8072
|
+
let inCharClass = false;
|
|
8029
8073
|
let i = 0;
|
|
8030
8074
|
while (i < size) {
|
|
8031
8075
|
let ch = data[i];
|
|
@@ -8064,10 +8108,28 @@ class TranslateRegExpString {
|
|
|
8064
8108
|
if (i + 2 < size) {
|
|
8065
8109
|
let nextCh = data[i + 2];
|
|
8066
8110
|
if (nextCh === '{') {
|
|
8067
|
-
|
|
8068
|
-
i
|
|
8069
|
-
|
|
8070
|
-
|
|
8111
|
+
// Must have a closing brace and at least one valid hex digit inside
|
|
8112
|
+
let j = i + 3;
|
|
8113
|
+
let hasHex = false;
|
|
8114
|
+
let closed = false;
|
|
8115
|
+
while (j < size) {
|
|
8116
|
+
const hexChar = data[j];
|
|
8117
|
+
if (hexChar === '}') {
|
|
8118
|
+
closed = true;
|
|
8119
|
+
break;
|
|
8120
|
+
}
|
|
8121
|
+
if (!TranslateRegExpString.isHexadecimal(hexChar)) {
|
|
8122
|
+
break;
|
|
8123
|
+
}
|
|
8124
|
+
hasHex = true;
|
|
8125
|
+
j++;
|
|
8126
|
+
}
|
|
8127
|
+
if (closed && hasHex) {
|
|
8128
|
+
result += '\\x';
|
|
8129
|
+
i += 2;
|
|
8130
|
+
changed = true;
|
|
8131
|
+
continue;
|
|
8132
|
+
}
|
|
8071
8133
|
} else if (i + 5 < size) {
|
|
8072
8134
|
let isHex4 = true;
|
|
8073
8135
|
for (let j = 0; j < 4; j++) {
|
|
@@ -8084,18 +8146,101 @@ class TranslateRegExpString {
|
|
|
8084
8146
|
}
|
|
8085
8147
|
}
|
|
8086
8148
|
}
|
|
8149
|
+
|
|
8150
|
+
// Graceful degradation for invalid/unclosed \u sequences
|
|
8087
8151
|
result += 'u';
|
|
8088
8152
|
i += 2;
|
|
8089
8153
|
changed = true;
|
|
8090
8154
|
continue;
|
|
8091
8155
|
}
|
|
8156
|
+
case 'x':
|
|
8157
|
+
{
|
|
8158
|
+
let isValidHex = false;
|
|
8159
|
+
if (i + 2 < size && data[i + 2] === '{') {
|
|
8160
|
+
// Must have a closing brace and at least one valid hex digit inside
|
|
8161
|
+
let j = i + 3;
|
|
8162
|
+
let hasHex = false;
|
|
8163
|
+
let closed = false;
|
|
8164
|
+
while (j < size) {
|
|
8165
|
+
const hexChar = data[j];
|
|
8166
|
+
if (hexChar === '}') {
|
|
8167
|
+
closed = true;
|
|
8168
|
+
break;
|
|
8169
|
+
}
|
|
8170
|
+
if (!TranslateRegExpString.isHexadecimal(hexChar)) {
|
|
8171
|
+
break;
|
|
8172
|
+
}
|
|
8173
|
+
hasHex = true;
|
|
8174
|
+
j++;
|
|
8175
|
+
}
|
|
8176
|
+
if (closed && hasHex) {
|
|
8177
|
+
isValidHex = true;
|
|
8178
|
+
}
|
|
8179
|
+
} else if (i + 3 < size && TranslateRegExpString.isHexadecimal(data[i + 2]) && TranslateRegExpString.isHexadecimal(data[i + 3])) {
|
|
8180
|
+
isValidHex = true;
|
|
8181
|
+
}
|
|
8182
|
+
if (isValidHex) {
|
|
8183
|
+
result += '\\x';
|
|
8184
|
+
i += 2;
|
|
8185
|
+
} else {
|
|
8186
|
+
result += 'x';
|
|
8187
|
+
i += 2;
|
|
8188
|
+
changed = true;
|
|
8189
|
+
}
|
|
8190
|
+
continue;
|
|
8191
|
+
}
|
|
8192
|
+
// Whitelist of valid RE2/JS alphanumeric escapes
|
|
8193
|
+
case 'n':
|
|
8194
|
+
case 'r':
|
|
8195
|
+
case 't':
|
|
8196
|
+
case 'a':
|
|
8197
|
+
case 'f':
|
|
8198
|
+
case 'v':
|
|
8199
|
+
case 'd':
|
|
8200
|
+
case 'D':
|
|
8201
|
+
case 's':
|
|
8202
|
+
case 'S':
|
|
8203
|
+
case 'w':
|
|
8204
|
+
case 'W':
|
|
8205
|
+
case 'b':
|
|
8206
|
+
case 'B':
|
|
8207
|
+
case 'p':
|
|
8208
|
+
case 'P':
|
|
8209
|
+
case 'A':
|
|
8210
|
+
case 'z':
|
|
8211
|
+
case 'Q':
|
|
8212
|
+
case 'E':
|
|
8213
|
+
case '0':
|
|
8214
|
+
case '1':
|
|
8215
|
+
case '2':
|
|
8216
|
+
case '3':
|
|
8217
|
+
case '4':
|
|
8218
|
+
case '5':
|
|
8219
|
+
case '6':
|
|
8220
|
+
case '7':
|
|
8221
|
+
{
|
|
8222
|
+
result += '\\' + ch;
|
|
8223
|
+
i += 2;
|
|
8224
|
+
continue;
|
|
8225
|
+
}
|
|
8092
8226
|
default:
|
|
8093
8227
|
{
|
|
8094
|
-
result += '\\';
|
|
8095
8228
|
let cp = data.codePointAt(i + 1);
|
|
8096
|
-
let
|
|
8097
|
-
|
|
8098
|
-
|
|
8229
|
+
let isAlphaNum = cp >= 48 && cp <= 57 || cp >= 65 && cp <= 90 || cp >= 97 && cp <= 122;
|
|
8230
|
+
if (isAlphaNum) {
|
|
8231
|
+
// Invalid JS alphanumeric escape sequence (e.g. \8, \9, \e, \K)
|
|
8232
|
+
// Gracefully degrade to the literal character to prevent RE2 syntax crashes
|
|
8233
|
+
let symSize = Utils.charCount(cp);
|
|
8234
|
+
result += data.substring(i + 1, i + 1 + symSize);
|
|
8235
|
+
i += symSize + 1;
|
|
8236
|
+
changed = true;
|
|
8237
|
+
} else {
|
|
8238
|
+
// Escaped symbol (e.g. \., \*, \])
|
|
8239
|
+
result += '\\';
|
|
8240
|
+
let symSize = Utils.charCount(cp);
|
|
8241
|
+
result += data.substring(i + 1, i + 1 + symSize);
|
|
8242
|
+
i += symSize + 1;
|
|
8243
|
+
}
|
|
8099
8244
|
continue;
|
|
8100
8245
|
}
|
|
8101
8246
|
}
|
|
@@ -8105,7 +8250,13 @@ class TranslateRegExpString {
|
|
|
8105
8250
|
i += 1;
|
|
8106
8251
|
changed = true;
|
|
8107
8252
|
continue;
|
|
8108
|
-
} else if (ch === '
|
|
8253
|
+
} else if (ch === '[') {
|
|
8254
|
+
// Track entry into a character class (protects syntax inside)
|
|
8255
|
+
inCharClass = true;
|
|
8256
|
+
} else if (ch === ']') {
|
|
8257
|
+
// Track exit of a character class
|
|
8258
|
+
inCharClass = false;
|
|
8259
|
+
} else if (!inCharClass && ch === '(' && i + 2 < size && data[i + 1] === '?' && data[i + 2] === '<') {
|
|
8109
8260
|
if (i + 3 < size && !'=!>)'.includes(data[i + 3])) {
|
|
8110
8261
|
result += '(?P<';
|
|
8111
8262
|
i += 3;
|
|
@@ -8118,7 +8269,13 @@ class TranslateRegExpString {
|
|
|
8118
8269
|
result += data.substring(i, i + symSize);
|
|
8119
8270
|
i += symSize;
|
|
8120
8271
|
}
|
|
8121
|
-
|
|
8272
|
+
const finalResult = changed ? result : data;
|
|
8273
|
+
|
|
8274
|
+
// Append any extracted inline flags
|
|
8275
|
+
if (prefixFlags.length > 0) {
|
|
8276
|
+
return `(?${prefixFlags})${finalResult}`;
|
|
8277
|
+
}
|
|
8278
|
+
return finalResult;
|
|
8122
8279
|
}
|
|
8123
8280
|
}
|
|
8124
8281
|
|
|
@@ -8196,7 +8353,7 @@ class RE2JS {
|
|
|
8196
8353
|
* RE2JS-compatible syntax, and handling Unicode sequences properly. It ensures that the
|
|
8197
8354
|
* resulting regex is safe and properly formatted before compilation.
|
|
8198
8355
|
*
|
|
8199
|
-
* @param {string} expr - The regular expression string to be translated.
|
|
8356
|
+
* @param {string|RegExp} expr - The regular expression string to be translated.
|
|
8200
8357
|
* @returns {string} - The transformed regular expression string, ready for compilation.
|
|
8201
8358
|
*/
|
|
8202
8359
|
static translateRegExp(expr) {
|
|
@@ -8240,7 +8397,7 @@ class RE2JS {
|
|
|
8240
8397
|
* Matches a string against a regular expression.
|
|
8241
8398
|
*
|
|
8242
8399
|
* @param {string} regex the regular expression
|
|
8243
|
-
* @param {string|number[]} input the input
|
|
8400
|
+
* @param {string|number[]|Uint8Array} input the input
|
|
8244
8401
|
* @returns {boolean} true if the regular expression matches the entire input
|
|
8245
8402
|
* @throws RE2JSSyntaxException if the regular expression is malformed
|
|
8246
8403
|
*/
|
|
@@ -8307,7 +8464,7 @@ class RE2JS {
|
|
|
8307
8464
|
/**
|
|
8308
8465
|
* Matches a string against a regular expression.
|
|
8309
8466
|
*
|
|
8310
|
-
* @param {string|number[]} input the input
|
|
8467
|
+
* @param {string|number[]|Uint8Array} input the input
|
|
8311
8468
|
* @returns {boolean} true if the regular expression matches the entire input
|
|
8312
8469
|
*/
|
|
8313
8470
|
matches(input) {
|
|
@@ -8317,7 +8474,7 @@ class RE2JS {
|
|
|
8317
8474
|
/**
|
|
8318
8475
|
* Creates a new {@code Matcher} matching the pattern against the input.
|
|
8319
8476
|
*
|
|
8320
|
-
* @param {string|number[]} input the input string
|
|
8477
|
+
* @param {string|number[]|Uint8Array} input the input string
|
|
8321
8478
|
* @returns {Matcher}
|
|
8322
8479
|
*/
|
|
8323
8480
|
matcher(input) {
|
|
@@ -8333,7 +8490,7 @@ class RE2JS {
|
|
|
8333
8490
|
* a boolean and does not extract capture groups, it bypasses the `Matcher` overhead
|
|
8334
8491
|
* and guarantees execution on the high-speed DFA engine whenever possible.
|
|
8335
8492
|
*
|
|
8336
|
-
* @param {string|number[]} input - The input string or UTF-8 byte array to test against.
|
|
8493
|
+
* @param {string|number[]|Uint8Array} input - The input string or UTF-8 byte array to test against.
|
|
8337
8494
|
* @returns {boolean} `true` if the pattern is found anywhere in the input, `false` otherwise.
|
|
8338
8495
|
*/
|
|
8339
8496
|
test(input) {
|
|
@@ -8352,7 +8509,7 @@ class RE2JS {
|
|
|
8352
8509
|
* faster because it does not request capture group data. By requesting 0 capture groups,
|
|
8353
8510
|
* it securely routes execution through the DFA fast-path.
|
|
8354
8511
|
*
|
|
8355
|
-
* @param {string|number[]} input - The input string or UTF-8 byte array to test against.
|
|
8512
|
+
* @param {string|number[]|Uint8Array} input - The input string or UTF-8 byte array to test against.
|
|
8356
8513
|
* @returns {boolean} `true` if the exact input string fully matches the pattern, `false` otherwise.
|
|
8357
8514
|
*/
|
|
8358
8515
|
testExact(input) {
|