@bcts/envelope-pattern 1.0.0-alpha.16 → 1.0.0-alpha.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/index.cjs +1992 -1714
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +147 -31
- package/dist/index.d.cts.map +1 -1
- package/dist/index.d.mts +147 -31
- package/dist/index.d.mts.map +1 -1
- package/dist/index.iife.js +1984 -1707
- package/dist/index.iife.js.map +1 -1
- package/dist/index.mjs +1966 -1698
- package/dist/index.mjs.map +1 -1
- package/package.json +9 -9
- package/src/format.ts +32 -13
- package/src/parse/index.ts +138 -5
- package/src/parse/token.ts +59 -58
- package/src/pattern/index.ts +110 -2
- package/src/pattern/leaf/array-pattern.ts +26 -26
- package/src/pattern/leaf/bool-pattern.ts +12 -12
- package/src/pattern/leaf/byte-string-pattern.ts +15 -15
- package/src/pattern/leaf/cbor-pattern.ts +31 -31
- package/src/pattern/leaf/date-pattern.ts +9 -9
- package/src/pattern/leaf/index.ts +1 -2
- package/src/pattern/leaf/known-value-pattern.ts +21 -20
- package/src/pattern/leaf/map-pattern.ts +14 -14
- package/src/pattern/leaf/null-pattern.ts +8 -8
- package/src/pattern/leaf/number-pattern.ts +20 -20
- package/src/pattern/leaf/tagged-pattern.ts +20 -20
- package/src/pattern/leaf/text-pattern.ts +14 -14
- package/src/pattern/matcher.ts +88 -3
- package/src/pattern/meta/and-pattern.ts +19 -18
- package/src/pattern/meta/capture-pattern.ts +16 -17
- package/src/pattern/meta/group-pattern.ts +20 -17
- package/src/pattern/meta/not-pattern.ts +9 -8
- package/src/pattern/meta/or-pattern.ts +30 -25
- package/src/pattern/meta/search-pattern.ts +17 -17
- package/src/pattern/meta/traverse-pattern.ts +42 -18
- package/src/pattern/structure/assertions-pattern.ts +31 -32
- package/src/pattern/structure/digest-pattern.ts +23 -23
- package/src/pattern/structure/index.ts +1 -0
- package/src/pattern/structure/node-pattern.ts +17 -17
- package/src/pattern/structure/object-pattern.ts +14 -15
- package/src/pattern/structure/obscured-pattern.ts +7 -7
- package/src/pattern/structure/predicate-pattern.ts +14 -15
- package/src/pattern/structure/subject-pattern.ts +16 -17
- package/src/pattern/structure/wrapped-pattern.ts +40 -19
- package/src/pattern/vm.ts +12 -11
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@bcts/envelope-pattern",
|
|
3
|
-
"version": "1.0.0-alpha.
|
|
3
|
+
"version": "1.0.0-alpha.18",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"description": "Pattern matching for Gordian Envelope structures",
|
|
6
6
|
"license": "BSD-2-Clause-Patent",
|
|
@@ -60,16 +60,16 @@
|
|
|
60
60
|
"@bcts/eslint": "^0.1.0",
|
|
61
61
|
"@bcts/tsconfig": "^0.1.0",
|
|
62
62
|
"eslint": "^9.39.2",
|
|
63
|
-
"tsdown": "^0.
|
|
64
|
-
"typedoc": "^0.28.
|
|
63
|
+
"tsdown": "^0.20.1",
|
|
64
|
+
"typedoc": "^0.28.16",
|
|
65
65
|
"typescript": "^5.9.3",
|
|
66
|
-
"vitest": "^4.0.
|
|
66
|
+
"vitest": "^4.0.18"
|
|
67
67
|
},
|
|
68
68
|
"dependencies": {
|
|
69
|
-
"@bcts/dcbor": "^1.0.0-alpha.
|
|
70
|
-
"@bcts/dcbor-parse": "^1.0.0-alpha.
|
|
71
|
-
"@bcts/dcbor-pattern": "^1.0.0-alpha.
|
|
72
|
-
"@bcts/envelope": "^1.0.0-alpha.
|
|
73
|
-
"@bcts/known-values": "^1.0.0-alpha.
|
|
69
|
+
"@bcts/dcbor": "^1.0.0-alpha.18",
|
|
70
|
+
"@bcts/dcbor-parse": "^1.0.0-alpha.18",
|
|
71
|
+
"@bcts/dcbor-pattern": "^1.0.0-alpha.18",
|
|
72
|
+
"@bcts/envelope": "^1.0.0-alpha.18",
|
|
73
|
+
"@bcts/known-values": "^1.0.0-alpha.18"
|
|
74
74
|
}
|
|
75
75
|
}
|
package/src/format.ts
CHANGED
|
@@ -97,15 +97,15 @@ export function defaultFormatPathsOpts(): FormatPathsOpts {
|
|
|
97
97
|
* Builder for FormatPathsOpts.
|
|
98
98
|
*/
|
|
99
99
|
export class FormatPathsOptsBuilder {
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
100
|
+
private _indent = true;
|
|
101
|
+
private _elementFormat: PathElementFormat = defaultPathElementFormat();
|
|
102
|
+
private _lastElementOnly = false;
|
|
103
103
|
|
|
104
104
|
/**
|
|
105
105
|
* Sets whether to indent each path element.
|
|
106
106
|
*/
|
|
107
107
|
indent(indent: boolean): this {
|
|
108
|
-
this
|
|
108
|
+
this._indent = indent;
|
|
109
109
|
return this;
|
|
110
110
|
}
|
|
111
111
|
|
|
@@ -113,7 +113,7 @@ export class FormatPathsOptsBuilder {
|
|
|
113
113
|
* Sets the format for each path element.
|
|
114
114
|
*/
|
|
115
115
|
elementFormat(format: PathElementFormat): this {
|
|
116
|
-
this
|
|
116
|
+
this._elementFormat = format;
|
|
117
117
|
return this;
|
|
118
118
|
}
|
|
119
119
|
|
|
@@ -121,7 +121,7 @@ export class FormatPathsOptsBuilder {
|
|
|
121
121
|
* Sets whether to format only the last element of each path.
|
|
122
122
|
*/
|
|
123
123
|
lastElementOnly(lastElementOnly: boolean): this {
|
|
124
|
-
this
|
|
124
|
+
this._lastElementOnly = lastElementOnly;
|
|
125
125
|
return this;
|
|
126
126
|
}
|
|
127
127
|
|
|
@@ -130,9 +130,9 @@ export class FormatPathsOptsBuilder {
|
|
|
130
130
|
*/
|
|
131
131
|
build(): FormatPathsOpts {
|
|
132
132
|
return {
|
|
133
|
-
indent: this
|
|
134
|
-
elementFormat: this
|
|
135
|
-
lastElementOnly: this
|
|
133
|
+
indent: this._indent,
|
|
134
|
+
elementFormat: this._elementFormat,
|
|
135
|
+
lastElementOnly: this._lastElementOnly,
|
|
136
136
|
};
|
|
137
137
|
}
|
|
138
138
|
}
|
|
@@ -156,18 +156,37 @@ export function envelopeSummary(env: Envelope): string {
|
|
|
156
156
|
|
|
157
157
|
let summary: string;
|
|
158
158
|
switch (c.type) {
|
|
159
|
-
case "node":
|
|
160
|
-
|
|
159
|
+
case "node": {
|
|
160
|
+
const subjectSummary = env.subject().summary(Number.MAX_SAFE_INTEGER);
|
|
161
|
+
const assertions = env.assertions();
|
|
162
|
+
if (assertions.length > 0) {
|
|
163
|
+
const assertionSummaries = assertions.map((a) => {
|
|
164
|
+
const ac = a.case();
|
|
165
|
+
if (ac.type === "assertion") {
|
|
166
|
+
const pred = ac.assertion.predicate().summary(Number.MAX_SAFE_INTEGER);
|
|
167
|
+
const obj = ac.assertion.object().summary(Number.MAX_SAFE_INTEGER);
|
|
168
|
+
return `${pred}: ${obj}`;
|
|
169
|
+
}
|
|
170
|
+
return a.summary(Number.MAX_SAFE_INTEGER);
|
|
171
|
+
});
|
|
172
|
+
summary = `NODE ${subjectSummary} [ ${assertionSummaries.join(", ")} ]`;
|
|
173
|
+
} else {
|
|
174
|
+
summary = `NODE ${subjectSummary}`;
|
|
175
|
+
}
|
|
161
176
|
break;
|
|
177
|
+
}
|
|
162
178
|
case "leaf":
|
|
163
179
|
summary = `LEAF ${env.summary(Number.MAX_SAFE_INTEGER)}`;
|
|
164
180
|
break;
|
|
165
181
|
case "wrapped":
|
|
166
182
|
summary = `WRAPPED ${env.summary(Number.MAX_SAFE_INTEGER)}`;
|
|
167
183
|
break;
|
|
168
|
-
case "assertion":
|
|
169
|
-
|
|
184
|
+
case "assertion": {
|
|
185
|
+
const pred = c.assertion.predicate().summary(Number.MAX_SAFE_INTEGER);
|
|
186
|
+
const obj = c.assertion.object().summary(Number.MAX_SAFE_INTEGER);
|
|
187
|
+
summary = `ASSERTION ${pred}: ${obj}`;
|
|
170
188
|
break;
|
|
189
|
+
}
|
|
171
190
|
case "elided":
|
|
172
191
|
summary = "ELIDED";
|
|
173
192
|
break;
|
package/src/parse/index.ts
CHANGED
|
@@ -8,6 +8,7 @@
|
|
|
8
8
|
*/
|
|
9
9
|
|
|
10
10
|
import { parse as parseDcborPattern } from "@bcts/dcbor-pattern";
|
|
11
|
+
import { parseDcborItemPartial } from "@bcts/dcbor-parse";
|
|
11
12
|
import { Lexer } from "./token";
|
|
12
13
|
import {
|
|
13
14
|
type Result,
|
|
@@ -37,10 +38,17 @@ import {
|
|
|
37
38
|
byteString,
|
|
38
39
|
anyDate,
|
|
39
40
|
date,
|
|
41
|
+
dateRange,
|
|
42
|
+
dateEarliest,
|
|
43
|
+
dateLatest,
|
|
44
|
+
dateRegex,
|
|
40
45
|
anyKnownValue,
|
|
41
46
|
knownValue,
|
|
42
47
|
anyArray,
|
|
43
48
|
anyTag,
|
|
49
|
+
anyCbor,
|
|
50
|
+
cborValue,
|
|
51
|
+
cborPattern,
|
|
44
52
|
nullPattern,
|
|
45
53
|
// Structure pattern constructors
|
|
46
54
|
leaf,
|
|
@@ -79,7 +87,6 @@ import {
|
|
|
79
87
|
ByteStringPattern,
|
|
80
88
|
KnownValuePattern,
|
|
81
89
|
ArrayPattern,
|
|
82
|
-
TaggedPattern,
|
|
83
90
|
DigestPattern,
|
|
84
91
|
NodePattern,
|
|
85
92
|
AssertionsPattern,
|
|
@@ -87,7 +94,6 @@ import {
|
|
|
87
94
|
leafByteString,
|
|
88
95
|
leafKnownValue,
|
|
89
96
|
leafArray,
|
|
90
|
-
leafTag,
|
|
91
97
|
structureDigest,
|
|
92
98
|
structureNode,
|
|
93
99
|
structureAssertions,
|
|
@@ -469,6 +475,7 @@ function parsePrimary(lexer: Lexer): Result<Pattern> {
|
|
|
469
475
|
case "Comma":
|
|
470
476
|
case "Ellipsis":
|
|
471
477
|
case "Range":
|
|
478
|
+
case "Identifier":
|
|
472
479
|
return err(unexpectedToken(token, span));
|
|
473
480
|
}
|
|
474
481
|
}
|
|
@@ -685,7 +692,50 @@ function parseTag(lexer: Lexer): Result<Pattern> {
|
|
|
685
692
|
* Parse date content from date'...' pattern.
|
|
686
693
|
*/
|
|
687
694
|
function parseDateContent(content: string, span: Span): Result<Pattern> {
|
|
688
|
-
//
|
|
695
|
+
// Check for regex syntax: /pattern/
|
|
696
|
+
if (content.startsWith("/") && content.endsWith("/")) {
|
|
697
|
+
const regexStr = content.slice(1, -1);
|
|
698
|
+
try {
|
|
699
|
+
return ok(dateRegex(new RegExp(regexStr)));
|
|
700
|
+
} catch {
|
|
701
|
+
return err(invalidRegex(span));
|
|
702
|
+
}
|
|
703
|
+
}
|
|
704
|
+
|
|
705
|
+
// Check for range syntax: date1...date2, date1..., ...date2
|
|
706
|
+
const rangeIdx = content.indexOf("...");
|
|
707
|
+
if (rangeIdx !== -1) {
|
|
708
|
+
const left = content.slice(0, rangeIdx).trim();
|
|
709
|
+
const right = content.slice(rangeIdx + 3).trim();
|
|
710
|
+
|
|
711
|
+
if (left.length === 0 && right.length > 0) {
|
|
712
|
+
// ...date2 → latest
|
|
713
|
+
const parsed = Date.parse(right);
|
|
714
|
+
if (isNaN(parsed)) return err({ type: "InvalidDateFormat", span });
|
|
715
|
+
return ok(dateLatest(CborDate.fromDatetime(new Date(parsed))));
|
|
716
|
+
}
|
|
717
|
+
if (left.length > 0 && right.length === 0) {
|
|
718
|
+
// date1... → earliest
|
|
719
|
+
const parsed = Date.parse(left);
|
|
720
|
+
if (isNaN(parsed)) return err({ type: "InvalidDateFormat", span });
|
|
721
|
+
return ok(dateEarliest(CborDate.fromDatetime(new Date(parsed))));
|
|
722
|
+
}
|
|
723
|
+
if (left.length > 0 && right.length > 0) {
|
|
724
|
+
// date1...date2 → range
|
|
725
|
+
const parsedStart = Date.parse(left);
|
|
726
|
+
const parsedEnd = Date.parse(right);
|
|
727
|
+
if (isNaN(parsedStart) || isNaN(parsedEnd)) return err({ type: "InvalidDateFormat", span });
|
|
728
|
+
return ok(
|
|
729
|
+
dateRange(
|
|
730
|
+
CborDate.fromDatetime(new Date(parsedStart)),
|
|
731
|
+
CborDate.fromDatetime(new Date(parsedEnd)),
|
|
732
|
+
),
|
|
733
|
+
);
|
|
734
|
+
}
|
|
735
|
+
return err({ type: "InvalidDateFormat", span });
|
|
736
|
+
}
|
|
737
|
+
|
|
738
|
+
// Simple exact date
|
|
689
739
|
const parsed = Date.parse(content);
|
|
690
740
|
if (isNaN(parsed)) {
|
|
691
741
|
return err({ type: "InvalidDateFormat", span });
|
|
@@ -713,17 +763,64 @@ function parseKnownValueContent(content: string): Result<Pattern> {
|
|
|
713
763
|
|
|
714
764
|
/**
|
|
715
765
|
* Parse CBOR pattern.
|
|
766
|
+
*
|
|
767
|
+
* Matches Rust parse_cbor: tries dcbor-pattern regex first (/keyword/),
|
|
768
|
+
* then CBOR diagnostic notation via parseDcborItemPartial, then falls
|
|
769
|
+
* back to parseOr for envelope pattern expressions.
|
|
716
770
|
*/
|
|
717
771
|
function parseCbor(lexer: Lexer): Result<Pattern> {
|
|
718
772
|
// Check for optional content in parentheses
|
|
719
773
|
const next = lexer.peekToken();
|
|
720
774
|
if (next?.token.type !== "ParenOpen") {
|
|
721
|
-
return ok(
|
|
775
|
+
return ok(anyCbor()); // cbor matches any CBOR value
|
|
722
776
|
}
|
|
723
777
|
|
|
724
778
|
lexer.next(); // consume (
|
|
725
779
|
|
|
726
|
-
//
|
|
780
|
+
// Check for dcbor-pattern regex syntax: cbor(/keyword/)
|
|
781
|
+
// Use peek() (character-level, non-destructive) instead of peekToken()
|
|
782
|
+
// to avoid the lexer advancing past the CBOR content.
|
|
783
|
+
if (lexer.peek() === "/") {
|
|
784
|
+
const regexTokenResult = lexer.next(); // tokenize /pattern/
|
|
785
|
+
if (regexTokenResult?.token.type === "Regex") {
|
|
786
|
+
const regexToken = regexTokenResult.token;
|
|
787
|
+
if (!regexToken.value.ok) return err(regexToken.value.error);
|
|
788
|
+
const keyword = regexToken.value.value;
|
|
789
|
+
|
|
790
|
+
// Parse the keyword as a dcbor-pattern expression
|
|
791
|
+
const dcborResult = parseDcborPattern(keyword);
|
|
792
|
+
if (!dcborResult.ok) {
|
|
793
|
+
return err(unexpectedToken(regexToken, regexTokenResult.span));
|
|
794
|
+
}
|
|
795
|
+
|
|
796
|
+
const close = lexer.next();
|
|
797
|
+
if (close?.token.type !== "ParenClose") {
|
|
798
|
+
return err({ type: "ExpectedCloseParen", span: lexer.span() });
|
|
799
|
+
}
|
|
800
|
+
|
|
801
|
+
return ok(cborPattern(dcborResult.value));
|
|
802
|
+
}
|
|
803
|
+
}
|
|
804
|
+
|
|
805
|
+
// Try to parse inner content as CBOR diagnostic notation
|
|
806
|
+
// (matching Rust utils::parse_cbor_inner which calls parse_dcbor_item_partial)
|
|
807
|
+
const remaining = lexer.remainder();
|
|
808
|
+
const cborResult = parseDcborItemPartial(remaining);
|
|
809
|
+
if (cborResult.ok) {
|
|
810
|
+
const [cborData, consumed] = cborResult.value;
|
|
811
|
+
lexer.bump(consumed);
|
|
812
|
+
// Skip whitespace before closing paren
|
|
813
|
+
while (lexer.peek() === " " || lexer.peek() === "\t" || lexer.peek() === "\n") {
|
|
814
|
+
lexer.bump(1);
|
|
815
|
+
}
|
|
816
|
+
const close = lexer.next();
|
|
817
|
+
if (close?.token.type !== "ParenClose") {
|
|
818
|
+
return err({ type: "ExpectedCloseParen", span: lexer.span() });
|
|
819
|
+
}
|
|
820
|
+
return ok(cborValue(cborData));
|
|
821
|
+
}
|
|
822
|
+
|
|
823
|
+
// Fallback: try parsing as a regular pattern expression
|
|
727
824
|
const inner = parseOr(lexer);
|
|
728
825
|
if (!inner.ok) return inner;
|
|
729
826
|
|
|
@@ -746,6 +843,24 @@ function parseNode(lexer: Lexer): Result<Pattern> {
|
|
|
746
843
|
}
|
|
747
844
|
|
|
748
845
|
lexer.next(); // consume (
|
|
846
|
+
|
|
847
|
+
// Check for assertion count range: node({n,m}), node({n}), node({n,})
|
|
848
|
+
const afterParen = lexer.peekToken();
|
|
849
|
+
if (afterParen?.token.type === "Range") {
|
|
850
|
+
lexer.next(); // consume Range token
|
|
851
|
+
const rangeToken = afterParen.token;
|
|
852
|
+
if (!rangeToken.value.ok) return err(rangeToken.value.error);
|
|
853
|
+
const quantifier = rangeToken.value.value;
|
|
854
|
+
const interval = quantifier.interval();
|
|
855
|
+
|
|
856
|
+
const close = lexer.next();
|
|
857
|
+
if (close?.token.type !== "ParenClose") {
|
|
858
|
+
return err({ type: "ExpectedCloseParen", span: lexer.span() });
|
|
859
|
+
}
|
|
860
|
+
|
|
861
|
+
return ok(patternStructure(structureNode(NodePattern.fromInterval(interval))));
|
|
862
|
+
}
|
|
863
|
+
|
|
749
864
|
const inner = parseOr(lexer);
|
|
750
865
|
if (!inner.ok) return inner;
|
|
751
866
|
|
|
@@ -847,6 +962,24 @@ function parseDigest(lexer: Lexer): Result<Pattern> {
|
|
|
847
962
|
return ok(digestPrefix(digestToken.token.value.value));
|
|
848
963
|
}
|
|
849
964
|
|
|
965
|
+
// Accept raw hex string identifiers: digest(a1b2c3)
|
|
966
|
+
if (digestToken.token.type === "Identifier") {
|
|
967
|
+
const hexStr = digestToken.token.value;
|
|
968
|
+
// Validate hex string: must be even length and all hex digits
|
|
969
|
+
if (hexStr.length === 0 || hexStr.length % 2 !== 0 || !/^[0-9a-fA-F]+$/.test(hexStr)) {
|
|
970
|
+
return err({ type: "InvalidHexString", span: digestToken.span });
|
|
971
|
+
}
|
|
972
|
+
const bytes = new Uint8Array(hexStr.length / 2);
|
|
973
|
+
for (let i = 0; i < hexStr.length; i += 2) {
|
|
974
|
+
bytes[i / 2] = Number.parseInt(hexStr.slice(i, i + 2), 16);
|
|
975
|
+
}
|
|
976
|
+
const close = lexer.next();
|
|
977
|
+
if (close?.token.type !== "ParenClose") {
|
|
978
|
+
return err({ type: "ExpectedCloseParen", span: lexer.span() });
|
|
979
|
+
}
|
|
980
|
+
return ok(digestPrefix(bytes));
|
|
981
|
+
}
|
|
982
|
+
|
|
850
983
|
return err(unexpectedToken(digestToken.token, digestToken.span));
|
|
851
984
|
}
|
|
852
985
|
|
package/src/parse/token.ts
CHANGED
|
@@ -99,7 +99,8 @@ export type Token =
|
|
|
99
99
|
| { readonly type: "DatePattern"; readonly value: Result<string> }
|
|
100
100
|
| { readonly type: "Range"; readonly value: Result<Quantifier> }
|
|
101
101
|
| { readonly type: "SingleQuotedPattern"; readonly value: Result<string> }
|
|
102
|
-
| { readonly type: "SingleQuotedRegex"; readonly value: Result<string> }
|
|
102
|
+
| { readonly type: "SingleQuotedRegex"; readonly value: Result<string> }
|
|
103
|
+
| { readonly type: "Identifier"; readonly value: string };
|
|
103
104
|
|
|
104
105
|
/**
|
|
105
106
|
* Keyword to token type mapping.
|
|
@@ -183,31 +184,31 @@ function isHexDigit(ch: string): boolean {
|
|
|
183
184
|
* Lexer for Gordian Envelope pattern syntax.
|
|
184
185
|
*/
|
|
185
186
|
export class Lexer {
|
|
186
|
-
readonly
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
187
|
+
private readonly _source: string;
|
|
188
|
+
private _position = 0;
|
|
189
|
+
private _tokenStart = 0;
|
|
190
|
+
private _peekedToken: { token: Token; span: Span } | undefined = undefined;
|
|
190
191
|
|
|
191
192
|
constructor(source: string) {
|
|
192
|
-
this
|
|
193
|
+
this._source = source;
|
|
193
194
|
}
|
|
194
195
|
|
|
195
196
|
/**
|
|
196
197
|
* Gets the current position in the source.
|
|
197
198
|
*/
|
|
198
199
|
get position(): number {
|
|
199
|
-
return this
|
|
200
|
+
return this._position;
|
|
200
201
|
}
|
|
201
202
|
|
|
202
203
|
/**
|
|
203
204
|
* Peeks at the next token without consuming it.
|
|
204
205
|
*/
|
|
205
206
|
peekToken(): { token: Token; span: Span } | undefined {
|
|
206
|
-
if (this
|
|
207
|
-
return this
|
|
207
|
+
if (this._peekedToken !== undefined) {
|
|
208
|
+
return this._peekedToken;
|
|
208
209
|
}
|
|
209
210
|
const result = this.next();
|
|
210
|
-
this
|
|
211
|
+
this._peekedToken = result;
|
|
211
212
|
return result;
|
|
212
213
|
}
|
|
213
214
|
|
|
@@ -215,51 +216,51 @@ export class Lexer {
|
|
|
215
216
|
* Gets the current span (from token start to current position).
|
|
216
217
|
*/
|
|
217
218
|
span(): Span {
|
|
218
|
-
return { start: this
|
|
219
|
+
return { start: this._tokenStart, end: this._position };
|
|
219
220
|
}
|
|
220
221
|
|
|
221
222
|
/**
|
|
222
223
|
* Gets the remaining source string.
|
|
223
224
|
*/
|
|
224
225
|
remainder(): string {
|
|
225
|
-
return this
|
|
226
|
+
return this._source.slice(this._position);
|
|
226
227
|
}
|
|
227
228
|
|
|
228
229
|
/**
|
|
229
230
|
* Peeks at the current character without consuming it.
|
|
230
231
|
*/
|
|
231
232
|
peek(): string | undefined {
|
|
232
|
-
if (this
|
|
233
|
+
if (this._position >= this._source.length) {
|
|
233
234
|
return undefined;
|
|
234
235
|
}
|
|
235
|
-
return this
|
|
236
|
+
return this._source[this._position];
|
|
236
237
|
}
|
|
237
238
|
|
|
238
239
|
/**
|
|
239
240
|
* Peeks at the next character without consuming current.
|
|
240
241
|
*/
|
|
241
242
|
peekNext(): string | undefined {
|
|
242
|
-
if (this
|
|
243
|
+
if (this._position + 1 >= this._source.length) {
|
|
243
244
|
return undefined;
|
|
244
245
|
}
|
|
245
|
-
return this
|
|
246
|
+
return this._source[this._position + 1];
|
|
246
247
|
}
|
|
247
248
|
|
|
248
249
|
/**
|
|
249
250
|
* Advances the position by n characters.
|
|
250
251
|
*/
|
|
251
252
|
bump(n = 1): void {
|
|
252
|
-
this
|
|
253
|
+
this._position = Math.min(this._position + n, this._source.length);
|
|
253
254
|
}
|
|
254
255
|
|
|
255
256
|
/**
|
|
256
257
|
* Skips whitespace.
|
|
257
258
|
*/
|
|
258
|
-
|
|
259
|
-
while (this
|
|
260
|
-
const ch = this
|
|
259
|
+
private _skipWhitespace(): void {
|
|
260
|
+
while (this._position < this._source.length) {
|
|
261
|
+
const ch = this._source[this._position];
|
|
261
262
|
if (ch !== undefined && isWhitespace(ch)) {
|
|
262
|
-
this
|
|
263
|
+
this._position++;
|
|
263
264
|
} else {
|
|
264
265
|
break;
|
|
265
266
|
}
|
|
@@ -269,7 +270,7 @@ export class Lexer {
|
|
|
269
270
|
/**
|
|
270
271
|
* Parses a string literal (after the opening quote).
|
|
271
272
|
*/
|
|
272
|
-
|
|
273
|
+
private _parseStringLiteral(): Result<string> {
|
|
273
274
|
const src = this.remainder();
|
|
274
275
|
let escape = false;
|
|
275
276
|
let content = "";
|
|
@@ -323,7 +324,7 @@ export class Lexer {
|
|
|
323
324
|
/**
|
|
324
325
|
* Parses a regex pattern (after the opening slash).
|
|
325
326
|
*/
|
|
326
|
-
|
|
327
|
+
private _parseRegex(): Result<string> {
|
|
327
328
|
const src = this.remainder();
|
|
328
329
|
let escape = false;
|
|
329
330
|
|
|
@@ -358,7 +359,7 @@ export class Lexer {
|
|
|
358
359
|
/**
|
|
359
360
|
* Parses a hex pattern (after h').
|
|
360
361
|
*/
|
|
361
|
-
|
|
362
|
+
private _parseHexPattern(): Result<Uint8Array> {
|
|
362
363
|
const src = this.remainder();
|
|
363
364
|
|
|
364
365
|
for (let i = 0; i < src.length; i++) {
|
|
@@ -397,7 +398,7 @@ export class Lexer {
|
|
|
397
398
|
/**
|
|
398
399
|
* Parses a hex binary regex (after h'/).
|
|
399
400
|
*/
|
|
400
|
-
|
|
401
|
+
private _parseHexBinaryRegex(): Result<string> {
|
|
401
402
|
const src = this.remainder();
|
|
402
403
|
let escape = false;
|
|
403
404
|
|
|
@@ -436,7 +437,7 @@ export class Lexer {
|
|
|
436
437
|
/**
|
|
437
438
|
* Parses a date pattern (after date').
|
|
438
439
|
*/
|
|
439
|
-
|
|
440
|
+
private _parseDatePattern(): Result<string> {
|
|
440
441
|
const src = this.remainder();
|
|
441
442
|
|
|
442
443
|
for (let i = 0; i < src.length; i++) {
|
|
@@ -455,7 +456,7 @@ export class Lexer {
|
|
|
455
456
|
/**
|
|
456
457
|
* Parses a range pattern (after {).
|
|
457
458
|
*/
|
|
458
|
-
|
|
459
|
+
private _parseRange(): Result<Quantifier> {
|
|
459
460
|
const src = this.remainder();
|
|
460
461
|
let pos = 0;
|
|
461
462
|
|
|
@@ -554,7 +555,7 @@ export class Lexer {
|
|
|
554
555
|
/**
|
|
555
556
|
* Parses a single quoted pattern (after ').
|
|
556
557
|
*/
|
|
557
|
-
|
|
558
|
+
private _parseSingleQuotedPattern(): Result<string> {
|
|
558
559
|
const src = this.remainder();
|
|
559
560
|
|
|
560
561
|
for (let i = 0; i < src.length; i++) {
|
|
@@ -573,7 +574,7 @@ export class Lexer {
|
|
|
573
574
|
/**
|
|
574
575
|
* Parses a single quoted regex (after '/).
|
|
575
576
|
*/
|
|
576
|
-
|
|
577
|
+
private _parseSingleQuotedRegex(): Result<string> {
|
|
577
578
|
const src = this.remainder();
|
|
578
579
|
let escape = false;
|
|
579
580
|
|
|
@@ -612,8 +613,8 @@ export class Lexer {
|
|
|
612
613
|
/**
|
|
613
614
|
* Parses a number (integer or float).
|
|
614
615
|
*/
|
|
615
|
-
|
|
616
|
-
const startPos = this
|
|
616
|
+
private _parseNumber(): Token {
|
|
617
|
+
const startPos = this._position;
|
|
617
618
|
let isFloat = false;
|
|
618
619
|
let isNegative = false;
|
|
619
620
|
|
|
@@ -662,7 +663,7 @@ export class Lexer {
|
|
|
662
663
|
}
|
|
663
664
|
}
|
|
664
665
|
|
|
665
|
-
const numStr = this
|
|
666
|
+
const numStr = this._source.slice(startPos, this._position);
|
|
666
667
|
|
|
667
668
|
if (isFloat) {
|
|
668
669
|
const value = parseFloat(numStr);
|
|
@@ -688,25 +689,25 @@ export class Lexer {
|
|
|
688
689
|
*/
|
|
689
690
|
next(): { token: Token; span: Span } | undefined {
|
|
690
691
|
// Return peeked token if available
|
|
691
|
-
if (this
|
|
692
|
-
const peeked = this
|
|
693
|
-
this
|
|
692
|
+
if (this._peekedToken !== undefined) {
|
|
693
|
+
const peeked = this._peekedToken;
|
|
694
|
+
this._peekedToken = undefined;
|
|
694
695
|
return peeked;
|
|
695
696
|
}
|
|
696
697
|
|
|
697
|
-
this
|
|
698
|
-
this
|
|
698
|
+
this._skipWhitespace();
|
|
699
|
+
this._tokenStart = this._position;
|
|
699
700
|
|
|
700
|
-
if (this
|
|
701
|
+
if (this._position >= this._source.length) {
|
|
701
702
|
return undefined;
|
|
702
703
|
}
|
|
703
704
|
|
|
704
|
-
const ch = this
|
|
705
|
+
const ch = this._source[this._position];
|
|
705
706
|
if (ch === undefined) return undefined;
|
|
706
707
|
|
|
707
708
|
// Check for two-character operators first
|
|
708
|
-
const twoChar = this
|
|
709
|
-
const threeChar = this
|
|
709
|
+
const twoChar = this._source.slice(this._position, this._position + 2);
|
|
710
|
+
const threeChar = this._source.slice(this._position, this._position + 3);
|
|
710
711
|
|
|
711
712
|
// Check for ... (ellipsis)
|
|
712
713
|
if (threeChar === "...") {
|
|
@@ -715,7 +716,7 @@ export class Lexer {
|
|
|
715
716
|
}
|
|
716
717
|
|
|
717
718
|
// Check for -Infinity
|
|
718
|
-
if (this
|
|
719
|
+
if (this._source.slice(this._position, this._position + 9) === "-Infinity") {
|
|
719
720
|
this.bump(9);
|
|
720
721
|
return { token: { type: "NegativeInfinity" }, span: this.span() };
|
|
721
722
|
}
|
|
@@ -755,16 +756,16 @@ export class Lexer {
|
|
|
755
756
|
if (this.peek() === "/") {
|
|
756
757
|
this.bump(1);
|
|
757
758
|
return {
|
|
758
|
-
token: { type: "HexBinaryRegex", value: this
|
|
759
|
+
token: { type: "HexBinaryRegex", value: this._parseHexBinaryRegex() },
|
|
759
760
|
span: this.span(),
|
|
760
761
|
};
|
|
761
762
|
}
|
|
762
|
-
return { token: { type: "HexPattern", value: this
|
|
763
|
+
return { token: { type: "HexPattern", value: this._parseHexPattern() }, span: this.span() };
|
|
763
764
|
}
|
|
764
765
|
case "'/":
|
|
765
766
|
this.bump(2);
|
|
766
767
|
return {
|
|
767
|
-
token: { type: "SingleQuotedRegex", value: this
|
|
768
|
+
token: { type: "SingleQuotedRegex", value: this._parseSingleQuotedRegex() },
|
|
768
769
|
span: this.span(),
|
|
769
770
|
};
|
|
770
771
|
}
|
|
@@ -813,25 +814,25 @@ export class Lexer {
|
|
|
813
814
|
case '"':
|
|
814
815
|
this.bump(1);
|
|
815
816
|
return {
|
|
816
|
-
token: { type: "StringLiteral", value: this
|
|
817
|
+
token: { type: "StringLiteral", value: this._parseStringLiteral() },
|
|
817
818
|
span: this.span(),
|
|
818
819
|
};
|
|
819
820
|
case "/":
|
|
820
821
|
this.bump(1);
|
|
821
|
-
return { token: { type: "Regex", value: this
|
|
822
|
+
return { token: { type: "Regex", value: this._parseRegex() }, span: this.span() };
|
|
822
823
|
case "{":
|
|
823
824
|
this.bump(1);
|
|
824
|
-
return { token: { type: "Range", value: this
|
|
825
|
+
return { token: { type: "Range", value: this._parseRange() }, span: this.span() };
|
|
825
826
|
case "'":
|
|
826
827
|
this.bump(1);
|
|
827
828
|
return {
|
|
828
|
-
token: { type: "SingleQuotedPattern", value: this
|
|
829
|
+
token: { type: "SingleQuotedPattern", value: this._parseSingleQuotedPattern() },
|
|
829
830
|
span: this.span(),
|
|
830
831
|
};
|
|
831
832
|
case "@": {
|
|
832
833
|
// Group name
|
|
833
834
|
this.bump(1);
|
|
834
|
-
const start = this
|
|
835
|
+
const start = this._position;
|
|
835
836
|
let gc = this.peek();
|
|
836
837
|
if (gc !== undefined && isIdentStart(gc)) {
|
|
837
838
|
gc = this.peek();
|
|
@@ -839,7 +840,7 @@ export class Lexer {
|
|
|
839
840
|
this.bump(1);
|
|
840
841
|
gc = this.peek();
|
|
841
842
|
}
|
|
842
|
-
const name = this
|
|
843
|
+
const name = this._source.slice(start, this._position);
|
|
843
844
|
return { token: { type: "GroupName", name }, span: this.span() };
|
|
844
845
|
}
|
|
845
846
|
// Invalid group name, return as error token
|
|
@@ -848,26 +849,26 @@ export class Lexer {
|
|
|
848
849
|
}
|
|
849
850
|
|
|
850
851
|
// Check for date' pattern
|
|
851
|
-
if (this
|
|
852
|
+
if (this._source.slice(this._position, this._position + 5) === "date'") {
|
|
852
853
|
this.bump(5);
|
|
853
|
-
return { token: { type: "DatePattern", value: this
|
|
854
|
+
return { token: { type: "DatePattern", value: this._parseDatePattern() }, span: this.span() };
|
|
854
855
|
}
|
|
855
856
|
|
|
856
857
|
// Check for number (including negative)
|
|
857
858
|
const nextChar = this.peekNext();
|
|
858
859
|
if (isDigit(ch) || (ch === "-" && nextChar !== undefined && isDigit(nextChar))) {
|
|
859
|
-
return { token: this
|
|
860
|
+
return { token: this._parseNumber(), span: this.span() };
|
|
860
861
|
}
|
|
861
862
|
|
|
862
863
|
// Check for identifier/keyword
|
|
863
864
|
if (isIdentStart(ch)) {
|
|
864
|
-
const start = this
|
|
865
|
+
const start = this._position;
|
|
865
866
|
let ic = this.peek();
|
|
866
867
|
while (ic !== undefined && isIdentContinue(ic)) {
|
|
867
868
|
this.bump(1);
|
|
868
869
|
ic = this.peek();
|
|
869
870
|
}
|
|
870
|
-
const ident = this
|
|
871
|
+
const ident = this._source.slice(start, this._position);
|
|
871
872
|
|
|
872
873
|
// Check for keywords
|
|
873
874
|
const keyword = KEYWORDS.get(ident);
|
|
@@ -875,8 +876,8 @@ export class Lexer {
|
|
|
875
876
|
return { token: keyword, span: this.span() };
|
|
876
877
|
}
|
|
877
878
|
|
|
878
|
-
// Unknown identifier -
|
|
879
|
-
return
|
|
879
|
+
// Unknown identifier - return as Identifier token
|
|
880
|
+
return { token: { type: "Identifier", value: ident }, span: this.span() };
|
|
880
881
|
}
|
|
881
882
|
|
|
882
883
|
// Unknown character
|