@ptolemy2002/rgx 7.4.0 → 7.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -88,10 +88,12 @@ type RGXCapture<T = unknown> = {
88
88
  start: number;
89
89
  end: number;
90
90
  ownerId: string | null;
91
+ branch: number;
91
92
  };
92
93
 
93
94
  type RGXPartOptions<R, T=string> = {
94
95
  id: string;
96
+ rawTransform: (captured: string) => string;
95
97
  transform: (captured: string) => T;
96
98
  validate: (captured: RGXCapture<T>, part: RGXPart<R, T>, walker: RGXWalker<R>) => boolean | string;
97
99
  beforeCapture: ((part: RGXPart<R, T>, walker: RGXWalker<R>) => RGXPartControl) | null;
@@ -622,6 +624,7 @@ constructor(token: RGXToken, options?: Partial<RGXPartOptions<R, T>>)
622
624
  - `token` (`RGXToken`): The token to wrap.
623
625
  - `options` (`Partial<RGXPartOptions<R, T>>`, optional): Configuration options. Defaults to `{}`.
624
626
  - `id` (`string`, optional): An optional identifier for this part. Defaults to `null`, but must be a string if provided.
627
+ - `rawTransform` (`(captured: string) => string`, optional): A function that transforms the raw captured string before it is stored as `raw` on the capture result and before `transform` is applied. Defaults to an identity function.
625
628
  - `transform` (`(captured: string) => T`, optional): A function that transforms the captured string into the desired type `T`. Defaults to an identity function that casts the string to `T`.
626
629
  - `beforeCapture` (`((part: RGXPart<R, T>, walker: RGXWalker<R>) => RGXPartControl) | null`, optional): A callback invoked before capturing this part during walking. Returns an `RGXPartControl` value to control walker behavior: `"skip"` to skip this token without capturing, `"silent"` to capture but not record in `captures`, `"stop"` to halt immediately without capturing or advancing, or `void`/`undefined` to proceed normally. Defaults to `null`.
627
630
  - `afterCapture` (`((capture: RGXCapture<T>, part: RGXPart<R, T>, walker: RGXWalker<R>) => void) | null`, optional): A callback invoked after capturing this part during walking. Receives the typed `RGXCapture<T>` result. Can call `walker.stop()` to halt walking after this capture. Defaults to `null`.
@@ -630,6 +633,7 @@ constructor(token: RGXToken, options?: Partial<RGXPartOptions<R, T>>)
630
633
  #### Properties
631
634
  - `id` (`string | null`): An optional identifier for this part.
632
635
  - `token` (`RGXToken`): The wrapped token.
636
+ - `rawTransform` (`(captured: string) => string`, readonly): The raw transform function applied to the matched string before it is stored as `raw` and before `transform` is called.
633
637
  - `transform` (`(captured: string) => T`, readonly): The transform function used to convert captured strings to values of type `T`.
634
638
  - `beforeCapture` (`((part: RGXPart<R, T>, walker: RGXWalker<R>) => RGXPartControl) | null`, readonly): The before-capture callback, or `null`.
635
639
  - `afterCapture` (`((capture: RGXCapture<T>, part: RGXPart<R, T>, walker: RGXWalker<R>) => void) | null`, readonly): The after-capture callback, or `null`.
@@ -638,7 +642,7 @@ constructor(token: RGXToken, options?: Partial<RGXPartOptions<R, T>>)
638
642
 
639
643
  #### Methods
640
644
  - `toRgx() => RGXToken`: Returns the wrapped token.
641
- - `clone(depth: CloneDepth = "max") => RGXPart`: Creates a clone of this part. When `depth` is `0`, returns `this`; otherwise, returns a new `RGXPart` with a cloned token and the same `transform`, `beforeCapture`, and `afterCapture` references.
645
+ - `clone(depth: CloneDepth = "max") => RGXPart`: Creates a clone of this part. When `depth` is `0`, returns `this`; otherwise, returns a new `RGXPart` with a cloned token and the same `rawTransform`, `transform`, `beforeCapture`, and `afterCapture` references.
642
646
  - `hasId() => this is RGXPart<R, T> & { id: string }`: A type guard that checks if this part has a non-null `id`. If `true`, narrows the type to indicate that `id` is a string.
643
647
  - `validate(capture: RGXCapture<T>, walker: RGXWalker<R>) => void`: A method that calls the inner passed validation logic for this part, if any. If it returns `false`, a generic `RGXPartValidationFailedError` is thrown. If it returns a string, an `RGXPartValidationFailedError` is thrown with that string as the message. If it returns `true`, validation passed. This is called internally by the walker after capturing and transforming a part, before invoking `afterCapture`.
644
648
 
@@ -669,7 +673,7 @@ constructor(source: string, tokens: RGXTokenCollectionInput, options?: RGXWalker
669
673
  - `tokens` (`RGXTokenCollection`): The internal collection of tokens in 'concat' mode (readonly).
670
674
  - `tokenPosition` (`number`): The current index in the token collection. Setting this validates that the value is >= 0 and <= `tokens.length`, throwing `RGXOutOfBoundsError` if not.
671
675
  - `reduced` (`R`): A user-defined accumulator value, typically updated by `RGXPart` callbacks during walking.
672
- - `captures` (`RGXCapture[]`): An array of structured capture results recorded during walking. Each entry has a `raw` string, a `value` (the transform result for Parts, or the raw string for plain tokens), `start` and `end` indices in the source string, and an `ownerId` that is the `id` of the Part that produced it (or `null` for captures from plain tokens or parts without ids).
676
+ - `captures` (`RGXCapture[]`): An array of structured capture results recorded during walking. Each entry has a `raw` string (the `rawTransform` result for Parts, or the matched string for plain tokens), a `value` (the `transform` result for Parts, or the matched string for plain tokens), `start` and `end` indices in the source string, an `ownerId` that is the `id` of the Part that produced it (or `null` for captures from plain tokens or parts without ids), and a `branch` index indicating which alternative of a multi-branch Part token was matched (or `0` if there is only one branch or the token is not a Part).
673
677
  - `namedCaptures` (`Record<string, RGXCapture[]>`): An object mapping capture IDs to their corresponding `RGXCapture` results. Only Parts with non-null IDs are included. The captures occur in the same order as they appear in the `captures` array.
674
678
  - `infinite` (`boolean`): Whether the walker is in infinite mode — stays at the last token when the token collection is exhausted until the source is consumed.
675
679
  - `looping` (`boolean`): Whether the walker is in looping mode — loops back to token position `0` when the token collection is exhausted until the source is consumed.
@@ -684,7 +688,8 @@ constructor(source: string, tokens: RGXTokenCollectionInput, options?: RGXWalker
684
688
  - `lastCapture() => RGXCapture | null`: Returns the last entry in `captures`, or `null` if empty.
685
689
  - `currentToken() => RGXToken | null`: Returns the token at the current token position, or `null` if at the end.
686
690
  - `remainingSource() => string | null`: Returns the remaining source string from the current position onward, or `null` if the source is fully consumed.
687
- - `capture(token: RGXToken) => string`: Resolves the token to a regex, asserts that it matches at the current source position (throwing `RGXRegexNotMatchedAtPositionError` if not), and advances the source position by the match length. Returns the matched string.
691
+ - `capture(token: RGXToken, includeMatch?: false) => string`: Resolves the token to a regex, asserts that it matches at the current source position (throwing `RGXRegexNotMatchedAtPositionError` if not), and advances the source position by the match length. Returns the matched string.
692
+ - `capture(token: RGXToken, includeMatch: true) => RegExpExecArray`: Same as above, but returns the full `RegExpExecArray` from the match instead of just the matched string.
688
693
  - `step() => RGXCapture | null`: Steps through the next token in the collection. If the token is an `RGXPart`, calls `beforeCapture` first — if it returns `"stop"`, sets `stopped` and returns `null` without advancing; if `"skip"`, advances the token position and returns `null` without capturing; if `"silent"`, captures but does not add to `captures` or `namedCaptures`. After capturing, validates. After validating, calls `afterCapture` if present. Returns the `RGXCapture` result, or `null` if there are no more tokens (or no more source in `infinite`/`looping` mode), the step was skipped, or the walker was stopped.
689
694
  - `stepToToken(predicate: (token: RGXToken) => boolean) => void`: Steps through tokens until the predicate returns `true` for the current token or the walker is stopped. The matching token is not consumed.
690
695
  - `stepToPart(predicate?: (part: RGXPart<R>) => boolean) => void`: Steps through tokens until the next `RGXPart` satisfying the predicate is reached. If already at a Part, steps once first to move past it. The matching Part is not consumed.
@@ -1480,49 +1485,58 @@ Creates a new `ExtRegExp` from an existing one with additional or replaced flags
1480
1485
 
1481
1486
  ### regexMatchAtPosition
1482
1487
  ```typescript
1483
- function regexMatchAtPosition(regex: RegExp, str: string, position: number): string | null
1488
+ function regexMatchAtPosition(regex: RegExp, str: string, position: number, includeMatch: true): RegExpExecArray | null
1489
+ function regexMatchAtPosition(regex: RegExp, str: string, position: number, includeMatch?: false): string | null
1484
1490
  ```
1485
1491
 
1486
- Attempts to match the given regular expression at a specific position in the string and returns the matched string, or `null` if there is no match. This is done by creating a sticky (`y` flag) copy of the regex and setting its `lastIndex` to the desired position. The position must be within the bounds of the string (>= 0 and < string length), or an `RGXOutOfBoundsError` will be thrown.
1492
+ Attempts to match the given regular expression at a specific position in the string. This is done by creating a sticky (`y` flag) copy of the regex and setting its `lastIndex` to the desired position. The position must be within the bounds of the string (>= 0 and < string length), or an `RGXOutOfBoundsError` will be thrown.
1487
1493
 
1488
1494
  #### Parameters
1489
1495
  - `regex` (`RegExp`): The regular expression to match.
1490
1496
  - `str` (`string`): The string to match against.
1491
1497
  - `position` (`number`): The zero-based index in the string at which to attempt the match. Must be >= 0 and < `str.length`.
1498
+ - `includeMatch` (`boolean`, optional): When `true`, returns the full `RegExpExecArray` instead of just the matched string. Defaults to `false`.
1492
1499
 
1493
1500
  #### Returns
1494
- - `string | null`: The matched string if the regex matches at the specified position, otherwise `null`.
1501
+ - `string | null`: When `includeMatch` is `false` (default): the matched string if the regex matches at the specified position, otherwise `null`.
1502
+ - `RegExpExecArray | null`: When `includeMatch` is `true`: the full match array if the regex matches, otherwise `null`.
1495
1503
 
1496
1504
  ### doesRegexMatchAtPosition
1497
1505
  ```typescript
1498
- function doesRegexMatchAtPosition(regex: RegExp, str: string, position: number): boolean
1506
+ function doesRegexMatchAtPosition(regex: RegExp, str: string, position: number, includeMatch: true): RegExpExecArray | false
1507
+ function doesRegexMatchAtPosition(regex: RegExp, str: string, position: number, includeMatch?: false): boolean
1499
1508
  ```
1500
1509
 
1501
- Tests whether the given regular expression matches at a specific position in the string. This is a boolean wrapper around `regexMatchAtPosition`, returning `true` if the match is non-null.
1510
+ Tests whether the given regular expression matches at a specific position in the string.
1502
1511
 
1503
1512
  #### Parameters
1504
1513
  - `regex` (`RegExp`): The regular expression to test.
1505
1514
  - `str` (`string`): The string to test against.
1506
1515
  - `position` (`number`): The zero-based index in the string at which to test the match. Must be >= 0 and < `str.length`.
1516
+ - `includeMatch` (`boolean`, optional): When `true`, returns the full `RegExpExecArray` on a match instead of `true`. Defaults to `false`.
1507
1517
 
1508
1518
  #### Returns
1509
- - `boolean`: `true` if the regex matches at the specified position, otherwise `false`.
1519
+ - `boolean`: When `includeMatch` is `false` (default): `true` if the regex matches at the specified position, otherwise `false`.
1520
+ - `RegExpExecArray | false`: When `includeMatch` is `true`: the full match array if the regex matches, otherwise `false`.
1510
1521
 
1511
1522
  ### assertRegexMatchesAtPosition
1512
1523
  ```typescript
1513
- function assertRegexMatchesAtPosition(regex: RegExp, str: string, position: number, contextSize?: number | null): string
1524
+ function assertRegexMatchesAtPosition(regex: RegExp, str: string, position: number, contextSize?: number | null, includeMatch?: false): string
1525
+ function assertRegexMatchesAtPosition(regex: RegExp, str: string, position: number, contextSize: number | null | undefined, includeMatch: true): RegExpExecArray
1514
1526
  ```
1515
1527
 
1516
- Asserts that the given regular expression matches at a specific position in the string, throwing an `RGXRegexNotMatchedAtPositionError` if it does not. On success, returns the matched string.
1528
+ Asserts that the given regular expression matches at a specific position in the string, throwing an `RGXRegexNotMatchedAtPositionError` if it does not. On success, returns the matched string or full match array depending on `includeMatch`.
1517
1529
 
1518
1530
  #### Parameters
1519
1531
  - `regex` (`RegExp`): The regular expression to match.
1520
1532
  - `str` (`string`): The string to match against.
1521
1533
  - `position` (`number`): The zero-based index in the string at which to assert the match. Must be >= 0 and < `str.length`.
1522
1534
  - `contextSize` (`number | null`, optional): The number of characters on each side of the position to include in the error's context output. Defaults to `10`.
1535
+ - `includeMatch` (`boolean`, optional): When `true`, returns the full `RegExpExecArray` instead of just the matched string. Defaults to `false`.
1523
1536
 
1524
1537
  #### Returns
1525
- - `string`: The matched string if the regex matches at the specified position. Throws `RGXRegexNotMatchedAtPositionError` if there is no match.
1538
+ - `string`: When `includeMatch` is `false` (default): the matched string. Throws `RGXRegexNotMatchedAtPositionError` if there is no match.
1539
+ - `RegExpExecArray`: When `includeMatch` is `true`: the full match array. Throws `RGXRegexNotMatchedAtPositionError` if there is no match.
1526
1540
 
1527
1541
  ### cloneRGXToken
1528
1542
  ```typescript
@@ -1,3 +1,6 @@
1
- export declare function regexMatchAtPosition(regex: RegExp, str: string, position: number): string | null;
2
- export declare function doesRegexMatchAtPosition(regex: RegExp, str: string, position: number): boolean;
3
- export declare function assertRegexMatchesAtPosition(regex: RegExp, str: string, position: number, contextSize?: number | null): string;
1
+ export declare function regexMatchAtPosition(regex: RegExp, str: string, position: number, includeMatch: true): RegExpExecArray | null;
2
+ export declare function regexMatchAtPosition(regex: RegExp, str: string, position: number, includeMatch?: false): string | null;
3
+ export declare function doesRegexMatchAtPosition(regex: RegExp, str: string, position: number, includeMatch: true): RegExpExecArray | false;
4
+ export declare function doesRegexMatchAtPosition(regex: RegExp, str: string, position: number, includeMatch?: false): boolean;
5
+ export declare function assertRegexMatchesAtPosition(regex: RegExp, str: string, position: number, contextSize?: number | null, includeMatch?: false): string;
6
+ export declare function assertRegexMatchesAtPosition(regex: RegExp, str: string, position: number, contextSize: number | null | undefined, includeMatch: true): RegExpExecArray;
@@ -6,7 +6,7 @@ exports.assertRegexMatchesAtPosition = assertRegexMatchesAtPosition;
6
6
  const outOfBounds_1 = require("../errors/outOfBounds");
7
7
  const regexWithFlags_1 = require("./regexWithFlags");
8
8
  const errors_1 = require("../errors");
9
- function regexMatchAtPosition(regex, str, position) {
9
+ function regexMatchAtPosition(regex, str, position, includeMatch = false) {
10
10
  /*
11
11
  The y flag means sticky mode, which means the next match must start at
12
12
  lastIndex. By setting lastIndex to the position we want to check, we can test
@@ -16,15 +16,18 @@ function regexMatchAtPosition(regex, str, position) {
16
16
  const stickyRegex = (0, regexWithFlags_1.regexWithFlags)(regex, "y");
17
17
  stickyRegex.lastIndex = position;
18
18
  const match = stickyRegex.exec(str);
19
- return match ? match[0] : null;
19
+ return includeMatch ? match : (match ? match[0] : null);
20
20
  }
21
- function doesRegexMatchAtPosition(regex, str, position) {
22
- return regexMatchAtPosition(regex, str, position) !== null;
21
+ function doesRegexMatchAtPosition(regex, str, position, includeMatch = false) {
22
+ const match = regexMatchAtPosition(regex, str, position, true);
23
+ if (includeMatch)
24
+ return match ?? false;
25
+ return match !== null;
23
26
  }
24
- function assertRegexMatchesAtPosition(regex, str, position, contextSize = 10) {
25
- const result = regexMatchAtPosition(regex, str, position);
27
+ function assertRegexMatchesAtPosition(regex, str, position, contextSize = 10, includeMatch = false) {
28
+ const result = regexMatchAtPosition(regex, str, position, true);
26
29
  if (result === null) {
27
30
  throw new errors_1.RGXRegexNotMatchedAtPositionError("Regex not matched at index", regex, str, position, contextSize);
28
31
  }
29
- return result;
32
+ return includeMatch ? result : result[0];
30
33
  }
@@ -35,7 +35,8 @@ export declare class RGXWalker<R> implements RGXConvertibleToken {
35
35
  lastCapture(): RGXCapture | null;
36
36
  currentToken(): RGXToken;
37
37
  remainingSource(): string | null;
38
- capture(token: RGXToken): string;
38
+ capture(token: RGXToken, includeMatch: true): RegExpExecArray;
39
+ capture(token: RGXToken, includeMatch?: false): string;
39
40
  step(): RGXCapture | null;
40
41
  stepToToken(predicate: (token: RGXToken) => boolean): void;
41
42
  stepToPart(predicate?: (part: RGXPart<R>) => boolean): void;
@@ -8,6 +8,20 @@ const errors_1 = require("../errors");
8
8
  const part_1 = require("./part");
9
9
  const index_1 = require("../index");
10
10
  const internal_1 = require("../internal");
11
+ function createBranchGroups(tokens) {
12
+ if ((tokens instanceof collection_1.RGXTokenCollection && tokens.mode === "union") ||
13
+ index_1.RGXClassUnionToken.check(tokens))
14
+ return createBranchGroups(tokens.tokens);
15
+ if ((0, index_1.isRGXArrayToken)(tokens)) {
16
+ const newTokens = tokens.map((token, i) => {
17
+ return new index_1.RGXGroupToken({ name: `rgx_branch_${i}` }, [token]);
18
+ });
19
+ return new index_1.RGXClassUnionToken(newTokens);
20
+ }
21
+ else {
22
+ return tokens;
23
+ }
24
+ }
11
25
  class RGXWalker {
12
26
  get sourcePosition() {
13
27
  return this._sourcePosition;
@@ -68,11 +82,11 @@ class RGXWalker {
68
82
  return null;
69
83
  return this.source.slice(this.sourcePosition);
70
84
  }
71
- capture(token) {
85
+ capture(token, includeMatch = false) {
72
86
  const regex = (0, index_1.rgxa)([token]);
73
- const match = (0, index_1.assertRegexMatchesAtPosition)(regex, this.source, this.sourcePosition);
74
- this.sourcePosition += match.length;
75
- return match;
87
+ const match = (0, index_1.assertRegexMatchesAtPosition)(regex, this.source, this.sourcePosition, 10, true);
88
+ this.sourcePosition += match[0].length;
89
+ return includeMatch ? match : match[0];
76
90
  }
77
91
  step() {
78
92
  if (!this.infinite && !this.looping && this.atTokenEnd()) {
@@ -104,10 +118,26 @@ class RGXWalker {
104
118
  }
105
119
  // Capture the match
106
120
  const start = this.sourcePosition;
107
- const raw = this.capture(token);
121
+ let innerToken = token;
122
+ if (isPart)
123
+ innerToken = createBranchGroups(token.token);
124
+ const capture = this.capture(innerToken, true);
125
+ const raw = isPart ? token.rawTransform(capture[0]) : capture[0];
108
126
  const end = this.sourcePosition;
109
127
  const value = isPart ? token.transform(raw) : raw;
110
- const captureResult = { raw, value, start, end, ownerId: isPart && token.hasId() ? token.id : null };
128
+ let branch = 0;
129
+ if (isPart) {
130
+ // Determine branch index for captureResult by finding the first index
131
+ // with non-undefined match group.
132
+ for (let i = 0; i < capture.length; i++) {
133
+ const branchKey = `rgx_branch_${i}`;
134
+ if (capture.groups && capture.groups[branchKey] !== undefined) {
135
+ branch = i;
136
+ break;
137
+ }
138
+ }
139
+ }
140
+ const captureResult = { raw, value, start, end, branch, ownerId: isPart && token.hasId() ? token.id : null };
111
141
  // Validate the part. If validation fails, it will throw an error, so nothing below will run.
112
142
  if (isPart) {
113
143
  token.validate(captureResult, this);
@@ -8,9 +8,11 @@ export type RGXCapture<T = unknown> = {
8
8
  start: number;
9
9
  end: number;
10
10
  ownerId: string | null;
11
+ branch: number;
11
12
  };
12
13
  export type RGXPartOptions<R, T = string> = {
13
14
  id: string;
15
+ rawTransform: (captured: string) => string;
14
16
  transform: (captured: string) => T;
15
17
  validate: (captured: RGXCapture<T>, part: RGXPart<R, T>, walker: RGXWalker<R>) => boolean | string;
16
18
  beforeCapture: ((part: RGXPart<R, T>, walker: RGXWalker<R>) => RGXPartControl) | null;
@@ -19,6 +21,7 @@ export type RGXPartOptions<R, T = string> = {
19
21
  export declare class RGXPart<R, T = string> implements RGXConvertibleToken {
20
22
  id: string | null;
21
23
  token: RGXToken;
24
+ readonly rawTransform: RGXPartOptions<R, T>["rawTransform"];
22
25
  readonly transform: RGXPartOptions<R, T>["transform"];
23
26
  private readonly _validate;
24
27
  readonly beforeCapture: RGXPartOptions<R, T>["beforeCapture"];
@@ -11,6 +11,7 @@ class RGXPart {
11
11
  constructor(token, options = {}) {
12
12
  this.id = options.id ?? null;
13
13
  this.token = token;
14
+ this.rawTransform = options.rawTransform ?? (captured => captured);
14
15
  this.transform = options.transform ?? ((captured) => captured);
15
16
  this._validate = options.validate ?? (() => true);
16
17
  this.beforeCapture = options.beforeCapture ?? null;
@@ -45,6 +46,7 @@ class RGXPart {
45
46
  return this;
46
47
  return new RGXPart((0, clone_1.cloneRGXToken)(this.token, (0, immutability_utils_1.depthDecrement)(depth, 1)), {
47
48
  id: this.id ?? undefined,
49
+ rawTransform: this.rawTransform,
48
50
  transform: this.transform,
49
51
  beforeCapture: this.beforeCapture,
50
52
  afterCapture: this.afterCapture,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ptolemy2002/rgx",
3
- "version": "7.4.0",
3
+ "version": "7.5.0",
4
4
  "private": false,
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",