@bcts/dcbor-pattern 1.0.0-alpha.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +48 -0
- package/README.md +14 -0
- package/dist/index.cjs +6561 -0
- package/dist/index.cjs.map +1 -0
- package/dist/index.d.cts +2732 -0
- package/dist/index.d.cts.map +1 -0
- package/dist/index.d.mts +2732 -0
- package/dist/index.d.mts.map +1 -0
- package/dist/index.iife.js +6562 -0
- package/dist/index.iife.js.map +1 -0
- package/dist/index.mjs +6244 -0
- package/dist/index.mjs.map +1 -0
- package/package.json +85 -0
- package/src/error.ts +333 -0
- package/src/format.ts +299 -0
- package/src/index.ts +20 -0
- package/src/interval.ts +230 -0
- package/src/parse/index.ts +95 -0
- package/src/parse/meta/and-parser.ts +47 -0
- package/src/parse/meta/capture-parser.ts +56 -0
- package/src/parse/meta/index.ts +13 -0
- package/src/parse/meta/not-parser.ts +28 -0
- package/src/parse/meta/or-parser.ts +47 -0
- package/src/parse/meta/primary-parser.ts +420 -0
- package/src/parse/meta/repeat-parser.ts +133 -0
- package/src/parse/meta/search-parser.ts +56 -0
- package/src/parse/parse-registry.ts +31 -0
- package/src/parse/structure/array-parser.ts +210 -0
- package/src/parse/structure/index.ts +9 -0
- package/src/parse/structure/map-parser.ts +128 -0
- package/src/parse/structure/tagged-parser.ts +269 -0
- package/src/parse/token.ts +997 -0
- package/src/parse/value/bool-parser.ts +33 -0
- package/src/parse/value/bytestring-parser.ts +42 -0
- package/src/parse/value/date-parser.ts +24 -0
- package/src/parse/value/digest-parser.ts +24 -0
- package/src/parse/value/index.ts +14 -0
- package/src/parse/value/known-value-parser.ts +24 -0
- package/src/parse/value/null-parser.ts +19 -0
- package/src/parse/value/number-parser.ts +19 -0
- package/src/parse/value/text-parser.ts +43 -0
- package/src/pattern/index.ts +740 -0
- package/src/pattern/match-registry.ts +137 -0
- package/src/pattern/matcher.ts +388 -0
- package/src/pattern/meta/and-pattern.ts +56 -0
- package/src/pattern/meta/any-pattern.ts +43 -0
- package/src/pattern/meta/capture-pattern.ts +57 -0
- package/src/pattern/meta/index.ts +168 -0
- package/src/pattern/meta/not-pattern.ts +70 -0
- package/src/pattern/meta/or-pattern.ts +56 -0
- package/src/pattern/meta/repeat-pattern.ts +117 -0
- package/src/pattern/meta/search-pattern.ts +298 -0
- package/src/pattern/meta/sequence-pattern.ts +72 -0
- package/src/pattern/structure/array-pattern/assigner.ts +95 -0
- package/src/pattern/structure/array-pattern/backtrack.ts +240 -0
- package/src/pattern/structure/array-pattern/helpers.ts +140 -0
- package/src/pattern/structure/array-pattern/index.ts +502 -0
- package/src/pattern/structure/index.ts +122 -0
- package/src/pattern/structure/map-pattern.ts +255 -0
- package/src/pattern/structure/tagged-pattern.ts +190 -0
- package/src/pattern/value/bool-pattern.ts +67 -0
- package/src/pattern/value/bytes-utils.ts +48 -0
- package/src/pattern/value/bytestring-pattern.ts +111 -0
- package/src/pattern/value/date-pattern.ts +162 -0
- package/src/pattern/value/digest-pattern.ts +136 -0
- package/src/pattern/value/index.ts +168 -0
- package/src/pattern/value/known-value-pattern.ts +123 -0
- package/src/pattern/value/null-pattern.ts +46 -0
- package/src/pattern/value/number-pattern.ts +181 -0
- package/src/pattern/value/text-pattern.ts +82 -0
- package/src/pattern/vm.ts +619 -0
- package/src/quantifier.ts +185 -0
- package/src/reluctance.ts +65 -0
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Capture pattern parser.
|
|
3
|
+
*
|
|
4
|
+
* @module parse/meta/capture-parser
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import type { Lexer } from "../token";
|
|
8
|
+
import type { Pattern } from "../../pattern";
|
|
9
|
+
import type { Result } from "../../error";
|
|
10
|
+
import { Ok, Err } from "../../error";
|
|
11
|
+
import { capture } from "../../pattern";
|
|
12
|
+
import { parseOr } from "./or-parser";
|
|
13
|
+
|
|
14
|
+
/**
|
|
15
|
+
* Parse a capture pattern of the form `@name(pattern)`.
|
|
16
|
+
*/
|
|
17
|
+
export const parseCapture = (lexer: Lexer, name: string): Result<Pattern> => {
|
|
18
|
+
// Expect opening parenthesis
|
|
19
|
+
const openResult = lexer.next();
|
|
20
|
+
if (openResult === undefined) {
|
|
21
|
+
return Err({ type: "UnexpectedEndOfInput" });
|
|
22
|
+
}
|
|
23
|
+
if (!openResult.ok) {
|
|
24
|
+
return openResult;
|
|
25
|
+
}
|
|
26
|
+
if (openResult.value.token.type !== "ParenOpen") {
|
|
27
|
+
return Err({
|
|
28
|
+
type: "UnexpectedToken",
|
|
29
|
+
token: openResult.value.token,
|
|
30
|
+
span: openResult.value.span,
|
|
31
|
+
});
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
// Parse the inner pattern
|
|
35
|
+
const innerResult = parseOr(lexer);
|
|
36
|
+
if (!innerResult.ok) {
|
|
37
|
+
return innerResult;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
// Expect closing parenthesis
|
|
41
|
+
const closeResult = lexer.next();
|
|
42
|
+
if (closeResult === undefined) {
|
|
43
|
+
return Err({ type: "ExpectedCloseParen", span: lexer.span() });
|
|
44
|
+
}
|
|
45
|
+
if (!closeResult.ok) {
|
|
46
|
+
return closeResult;
|
|
47
|
+
}
|
|
48
|
+
if (closeResult.value.token.type !== "ParenClose") {
|
|
49
|
+
return Err({
|
|
50
|
+
type: "ExpectedCloseParen",
|
|
51
|
+
span: closeResult.value.span,
|
|
52
|
+
});
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
return Ok(capture(name, innerResult.value));
|
|
56
|
+
};
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Meta parsers for dCBOR patterns.
|
|
3
|
+
*
|
|
4
|
+
* @module parse/meta
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
export * from "./or-parser";
|
|
8
|
+
export * from "./and-parser";
|
|
9
|
+
export * from "./not-parser";
|
|
10
|
+
export * from "./repeat-parser";
|
|
11
|
+
export * from "./primary-parser";
|
|
12
|
+
export * from "./capture-parser";
|
|
13
|
+
export * from "./search-parser";
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* NOT pattern parser.
|
|
3
|
+
*
|
|
4
|
+
* @module parse/meta/not-parser
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import type { Lexer } from "../token";
|
|
8
|
+
import type { Pattern } from "../../pattern";
|
|
9
|
+
import type { Result } from "../../error";
|
|
10
|
+
import { Ok } from "../../error";
|
|
11
|
+
import { not } from "../../pattern";
|
|
12
|
+
import { parsePrimary } from "./primary-parser";
|
|
13
|
+
|
|
14
|
+
/**
|
|
15
|
+
* Parse a NOT pattern or delegate to primary parser.
|
|
16
|
+
*/
|
|
17
|
+
export const parseNot = (lexer: Lexer): Result<Pattern> => {
|
|
18
|
+
const peeked = lexer.peekToken();
|
|
19
|
+
if (peeked !== undefined && peeked.ok && peeked.value.type === "Not") {
|
|
20
|
+
lexer.next(); // consume the NOT token
|
|
21
|
+
const inner = parseNot(lexer); // right associative recursion
|
|
22
|
+
if (!inner.ok) {
|
|
23
|
+
return inner;
|
|
24
|
+
}
|
|
25
|
+
return Ok(not(inner.value));
|
|
26
|
+
}
|
|
27
|
+
return parsePrimary(lexer);
|
|
28
|
+
};
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* OR pattern parser - the top-level pattern parser.
|
|
3
|
+
*
|
|
4
|
+
* @module parse/meta/or-parser
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import type { Lexer } from "../token";
|
|
8
|
+
import type { Pattern } from "../../pattern";
|
|
9
|
+
import type { Result } from "../../error";
|
|
10
|
+
import { Ok } from "../../error";
|
|
11
|
+
import { or } from "../../pattern";
|
|
12
|
+
import { parseAnd } from "./and-parser";
|
|
13
|
+
|
|
14
|
+
/**
|
|
15
|
+
* Parse an OR pattern - the top-level pattern parser.
|
|
16
|
+
*/
|
|
17
|
+
export const parseOr = (lexer: Lexer): Result<Pattern> => {
|
|
18
|
+
const patterns: Pattern[] = [];
|
|
19
|
+
const first = parseAnd(lexer);
|
|
20
|
+
if (!first.ok) {
|
|
21
|
+
return first;
|
|
22
|
+
}
|
|
23
|
+
patterns.push(first.value);
|
|
24
|
+
|
|
25
|
+
while (true) {
|
|
26
|
+
const peeked = lexer.peekToken();
|
|
27
|
+
if (peeked?.ok !== true) {
|
|
28
|
+
break;
|
|
29
|
+
}
|
|
30
|
+
if (peeked.value.type !== "Or") {
|
|
31
|
+
break;
|
|
32
|
+
}
|
|
33
|
+
lexer.next(); // consume the OR token
|
|
34
|
+
|
|
35
|
+
const next = parseAnd(lexer);
|
|
36
|
+
if (!next.ok) {
|
|
37
|
+
return next;
|
|
38
|
+
}
|
|
39
|
+
patterns.push(next.value);
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
if (patterns.length === 1) {
|
|
43
|
+
return Ok(patterns[0]);
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
return Ok(or(...patterns));
|
|
47
|
+
};
|
|
@@ -0,0 +1,420 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Primary pattern parser - handles atomic patterns.
|
|
3
|
+
*
|
|
4
|
+
* @module parse/meta/primary-parser
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import { CborDate } from "@bcts/dcbor";
|
|
8
|
+
import { Digest } from "@bcts/components";
|
|
9
|
+
import type { Lexer } from "../token";
|
|
10
|
+
import type { Pattern } from "../../pattern";
|
|
11
|
+
import type { Result } from "../../error";
|
|
12
|
+
import { Ok, Err } from "../../error";
|
|
13
|
+
import { any, anyArray, anyMap, text, textRegex, number, numberRange } from "../../pattern";
|
|
14
|
+
import { parseBool, parseBoolTrue, parseBoolFalse } from "../value/bool-parser";
|
|
15
|
+
import { parseNull } from "../value/null-parser";
|
|
16
|
+
import { parseNumber } from "../value/number-parser";
|
|
17
|
+
import { parseText } from "../value/text-parser";
|
|
18
|
+
import {
|
|
19
|
+
parseByteString,
|
|
20
|
+
parseHexStringToken,
|
|
21
|
+
parseHexRegexToken,
|
|
22
|
+
} from "../value/bytestring-parser";
|
|
23
|
+
import { parseDate } from "../value/date-parser";
|
|
24
|
+
import { parseDigest } from "../value/digest-parser";
|
|
25
|
+
import { parseKnownValue } from "../value/known-value-parser";
|
|
26
|
+
import { parseTagged } from "../structure/tagged-parser";
|
|
27
|
+
import { parseBracketArray } from "../structure/array-parser";
|
|
28
|
+
import { parseBracketMap } from "../structure/map-parser";
|
|
29
|
+
import { parseCapture } from "./capture-parser";
|
|
30
|
+
import { parseSearch } from "./search-parser";
|
|
31
|
+
import { parseQuantifier } from "./repeat-parser";
|
|
32
|
+
import { parseOr } from "./or-parser";
|
|
33
|
+
import { mapPatternWithLengthInterval } from "../../pattern/structure/map-pattern";
|
|
34
|
+
import {
|
|
35
|
+
numberPatternNaN,
|
|
36
|
+
numberPatternInfinity,
|
|
37
|
+
numberPatternNegInfinity,
|
|
38
|
+
numberPatternGreaterThanOrEqual,
|
|
39
|
+
numberPatternLessThanOrEqual,
|
|
40
|
+
numberPatternGreaterThan,
|
|
41
|
+
numberPatternLessThan,
|
|
42
|
+
} from "../../pattern/value/number-pattern";
|
|
43
|
+
import { KnownValue } from "@bcts/known-values";
|
|
44
|
+
import {
|
|
45
|
+
knownValuePatternValue,
|
|
46
|
+
knownValuePatternNamed,
|
|
47
|
+
knownValuePatternRegex,
|
|
48
|
+
} from "../../pattern/value/known-value-pattern";
|
|
49
|
+
|
|
50
|
+
/**
|
|
51
|
+
* Parse a primary pattern - the most basic unit of pattern matching.
|
|
52
|
+
*/
|
|
53
|
+
export const parsePrimary = (lexer: Lexer): Result<Pattern> => {
|
|
54
|
+
const tokenResult = lexer.next();
|
|
55
|
+
|
|
56
|
+
if (tokenResult === undefined) {
|
|
57
|
+
return Err({ type: "UnexpectedEndOfInput" });
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
if (!tokenResult.ok) {
|
|
61
|
+
return tokenResult;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
const spanned = tokenResult.value;
|
|
65
|
+
const token = spanned.token;
|
|
66
|
+
|
|
67
|
+
switch (token.type) {
|
|
68
|
+
// Meta patterns
|
|
69
|
+
case "RepeatZeroOrMore":
|
|
70
|
+
// '*' as standalone pattern means "any"
|
|
71
|
+
return Ok(any());
|
|
72
|
+
|
|
73
|
+
case "Search":
|
|
74
|
+
return parseSearch(lexer);
|
|
75
|
+
|
|
76
|
+
// Parenthesized groups
|
|
77
|
+
case "ParenOpen": {
|
|
78
|
+
const patternResult = parseOr(lexer);
|
|
79
|
+
if (!patternResult.ok) {
|
|
80
|
+
return patternResult;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
const closeResult = lexer.next();
|
|
84
|
+
if (closeResult === undefined) {
|
|
85
|
+
return Err({ type: "ExpectedCloseParen", span: lexer.span() });
|
|
86
|
+
}
|
|
87
|
+
if (!closeResult.ok) {
|
|
88
|
+
return closeResult;
|
|
89
|
+
}
|
|
90
|
+
if (closeResult.value.token.type !== "ParenClose") {
|
|
91
|
+
return Err({
|
|
92
|
+
type: "ExpectedCloseParen",
|
|
93
|
+
span: closeResult.value.span,
|
|
94
|
+
});
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
// After closing parenthesis, check for quantifiers
|
|
98
|
+
return parseQuantifier(patternResult.value, lexer, true);
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
// Capture patterns (@name(...))
|
|
102
|
+
case "GroupName":
|
|
103
|
+
return parseCapture(lexer, token.name);
|
|
104
|
+
|
|
105
|
+
// Value patterns
|
|
106
|
+
case "Bool":
|
|
107
|
+
return parseBool(lexer);
|
|
108
|
+
case "BoolTrue":
|
|
109
|
+
return parseBoolTrue(lexer);
|
|
110
|
+
case "BoolFalse":
|
|
111
|
+
return parseBoolFalse(lexer);
|
|
112
|
+
case "ByteString":
|
|
113
|
+
return parseByteString(lexer);
|
|
114
|
+
case "Date":
|
|
115
|
+
return parseDate(lexer);
|
|
116
|
+
case "Digest":
|
|
117
|
+
return parseDigest(lexer);
|
|
118
|
+
case "DigestQuoted":
|
|
119
|
+
try {
|
|
120
|
+
const digest = Digest.fromHex(token.value);
|
|
121
|
+
return Ok({
|
|
122
|
+
kind: "Value",
|
|
123
|
+
pattern: { type: "Digest", pattern: { variant: "Value", value: digest } },
|
|
124
|
+
});
|
|
125
|
+
} catch {
|
|
126
|
+
return Err({
|
|
127
|
+
type: "InvalidDigest",
|
|
128
|
+
span: spanned.span,
|
|
129
|
+
});
|
|
130
|
+
}
|
|
131
|
+
case "DateQuoted":
|
|
132
|
+
try {
|
|
133
|
+
const date = CborDate.fromString(token.value);
|
|
134
|
+
return Ok({
|
|
135
|
+
kind: "Value",
|
|
136
|
+
pattern: { type: "Date", pattern: { variant: "Value", value: date } },
|
|
137
|
+
});
|
|
138
|
+
} catch {
|
|
139
|
+
return Err({
|
|
140
|
+
type: "InvalidDate",
|
|
141
|
+
span: spanned.span,
|
|
142
|
+
});
|
|
143
|
+
}
|
|
144
|
+
case "Known":
|
|
145
|
+
return parseKnownValue(lexer);
|
|
146
|
+
case "Null":
|
|
147
|
+
return parseNull(lexer);
|
|
148
|
+
case "Number":
|
|
149
|
+
return parseNumber(lexer);
|
|
150
|
+
case "Text":
|
|
151
|
+
return parseText(lexer);
|
|
152
|
+
|
|
153
|
+
// Direct string literal
|
|
154
|
+
case "StringLiteral":
|
|
155
|
+
return Ok(text(token.value));
|
|
156
|
+
|
|
157
|
+
// Single-quoted pattern (non-prefixed known value)
|
|
158
|
+
case "SingleQuoted":
|
|
159
|
+
return parseSingleQuotedAsKnownValue(token.value);
|
|
160
|
+
|
|
161
|
+
// Direct regex literal
|
|
162
|
+
case "Regex":
|
|
163
|
+
try {
|
|
164
|
+
const regex = new RegExp(token.pattern);
|
|
165
|
+
return Ok(textRegex(regex));
|
|
166
|
+
} catch {
|
|
167
|
+
return Err({
|
|
168
|
+
type: "InvalidRegex",
|
|
169
|
+
span: spanned.span,
|
|
170
|
+
});
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
// Direct hex string literal
|
|
174
|
+
case "HexString":
|
|
175
|
+
return parseHexStringToken(Ok(token.value));
|
|
176
|
+
|
|
177
|
+
// Direct hex regex literal
|
|
178
|
+
case "HexRegex":
|
|
179
|
+
try {
|
|
180
|
+
const regex = new RegExp(token.pattern);
|
|
181
|
+
return parseHexRegexToken(Ok(regex));
|
|
182
|
+
} catch {
|
|
183
|
+
return Err({
|
|
184
|
+
type: "InvalidRegex",
|
|
185
|
+
span: spanned.span,
|
|
186
|
+
});
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
// Structure patterns
|
|
190
|
+
case "Tagged":
|
|
191
|
+
return parseTagged(lexer);
|
|
192
|
+
|
|
193
|
+
case "Array":
|
|
194
|
+
return Ok(anyArray());
|
|
195
|
+
|
|
196
|
+
case "Map":
|
|
197
|
+
return Ok(anyMap());
|
|
198
|
+
|
|
199
|
+
// Bracket syntax for arrays
|
|
200
|
+
case "BracketOpen":
|
|
201
|
+
return parseBracketArray(lexer);
|
|
202
|
+
|
|
203
|
+
// Brace syntax for maps
|
|
204
|
+
case "BraceOpen":
|
|
205
|
+
return parseBracketMap(lexer);
|
|
206
|
+
|
|
207
|
+
// Range tokens that represent map length constraints
|
|
208
|
+
case "Range":
|
|
209
|
+
return Ok({
|
|
210
|
+
kind: "Structure",
|
|
211
|
+
pattern: {
|
|
212
|
+
type: "Map",
|
|
213
|
+
pattern: mapPatternWithLengthInterval(token.quantifier.interval()),
|
|
214
|
+
},
|
|
215
|
+
});
|
|
216
|
+
|
|
217
|
+
// Number literal
|
|
218
|
+
case "NumberLiteral": {
|
|
219
|
+
// Look ahead for range operator
|
|
220
|
+
const peeked = lexer.peekToken();
|
|
221
|
+
if (peeked !== undefined && peeked.ok && peeked.value.type === "Ellipsis") {
|
|
222
|
+
lexer.next(); // consume the ellipsis
|
|
223
|
+
const endResult = lexer.next();
|
|
224
|
+
if (endResult === undefined) {
|
|
225
|
+
return Err({ type: "UnexpectedEndOfInput" });
|
|
226
|
+
}
|
|
227
|
+
if (!endResult.ok) {
|
|
228
|
+
return endResult;
|
|
229
|
+
}
|
|
230
|
+
if (endResult.value.token.type !== "NumberLiteral") {
|
|
231
|
+
return Err({
|
|
232
|
+
type: "UnexpectedToken",
|
|
233
|
+
token: endResult.value.token,
|
|
234
|
+
span: endResult.value.span,
|
|
235
|
+
});
|
|
236
|
+
}
|
|
237
|
+
return Ok(numberRange(token.value, endResult.value.token.value));
|
|
238
|
+
}
|
|
239
|
+
return Ok(number(token.value));
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
case "NaN":
|
|
243
|
+
return Ok({
|
|
244
|
+
kind: "Value",
|
|
245
|
+
pattern: { type: "Number", pattern: numberPatternNaN() },
|
|
246
|
+
});
|
|
247
|
+
|
|
248
|
+
case "Infinity":
|
|
249
|
+
return Ok({
|
|
250
|
+
kind: "Value",
|
|
251
|
+
pattern: { type: "Number", pattern: numberPatternInfinity() },
|
|
252
|
+
});
|
|
253
|
+
|
|
254
|
+
case "NegInfinity":
|
|
255
|
+
return Ok({
|
|
256
|
+
kind: "Value",
|
|
257
|
+
pattern: { type: "Number", pattern: numberPatternNegInfinity() },
|
|
258
|
+
});
|
|
259
|
+
|
|
260
|
+
case "GreaterThanOrEqual": {
|
|
261
|
+
const numResult = lexer.next();
|
|
262
|
+
if (numResult === undefined) {
|
|
263
|
+
return Err({ type: "UnexpectedEndOfInput" });
|
|
264
|
+
}
|
|
265
|
+
if (!numResult.ok) {
|
|
266
|
+
return numResult;
|
|
267
|
+
}
|
|
268
|
+
if (numResult.value.token.type !== "NumberLiteral") {
|
|
269
|
+
return Err({
|
|
270
|
+
type: "UnexpectedToken",
|
|
271
|
+
token: numResult.value.token,
|
|
272
|
+
span: numResult.value.span,
|
|
273
|
+
});
|
|
274
|
+
}
|
|
275
|
+
return Ok({
|
|
276
|
+
kind: "Value",
|
|
277
|
+
pattern: {
|
|
278
|
+
type: "Number",
|
|
279
|
+
pattern: numberPatternGreaterThanOrEqual(numResult.value.token.value),
|
|
280
|
+
},
|
|
281
|
+
});
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
case "LessThanOrEqual": {
|
|
285
|
+
const numResult = lexer.next();
|
|
286
|
+
if (numResult === undefined) {
|
|
287
|
+
return Err({ type: "UnexpectedEndOfInput" });
|
|
288
|
+
}
|
|
289
|
+
if (!numResult.ok) {
|
|
290
|
+
return numResult;
|
|
291
|
+
}
|
|
292
|
+
if (numResult.value.token.type !== "NumberLiteral") {
|
|
293
|
+
return Err({
|
|
294
|
+
type: "UnexpectedToken",
|
|
295
|
+
token: numResult.value.token,
|
|
296
|
+
span: numResult.value.span,
|
|
297
|
+
});
|
|
298
|
+
}
|
|
299
|
+
return Ok({
|
|
300
|
+
kind: "Value",
|
|
301
|
+
pattern: {
|
|
302
|
+
type: "Number",
|
|
303
|
+
pattern: numberPatternLessThanOrEqual(numResult.value.token.value),
|
|
304
|
+
},
|
|
305
|
+
});
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
case "GreaterThan": {
|
|
309
|
+
const numResult = lexer.next();
|
|
310
|
+
if (numResult === undefined) {
|
|
311
|
+
return Err({ type: "UnexpectedEndOfInput" });
|
|
312
|
+
}
|
|
313
|
+
if (!numResult.ok) {
|
|
314
|
+
return numResult;
|
|
315
|
+
}
|
|
316
|
+
if (numResult.value.token.type !== "NumberLiteral") {
|
|
317
|
+
return Err({
|
|
318
|
+
type: "UnexpectedToken",
|
|
319
|
+
token: numResult.value.token,
|
|
320
|
+
span: numResult.value.span,
|
|
321
|
+
});
|
|
322
|
+
}
|
|
323
|
+
return Ok({
|
|
324
|
+
kind: "Value",
|
|
325
|
+
pattern: {
|
|
326
|
+
type: "Number",
|
|
327
|
+
pattern: numberPatternGreaterThan(numResult.value.token.value),
|
|
328
|
+
},
|
|
329
|
+
});
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
case "LessThan": {
|
|
333
|
+
const numResult = lexer.next();
|
|
334
|
+
if (numResult === undefined) {
|
|
335
|
+
return Err({ type: "UnexpectedEndOfInput" });
|
|
336
|
+
}
|
|
337
|
+
if (!numResult.ok) {
|
|
338
|
+
return numResult;
|
|
339
|
+
}
|
|
340
|
+
if (numResult.value.token.type !== "NumberLiteral") {
|
|
341
|
+
return Err({
|
|
342
|
+
type: "UnexpectedToken",
|
|
343
|
+
token: numResult.value.token,
|
|
344
|
+
span: numResult.value.span,
|
|
345
|
+
});
|
|
346
|
+
}
|
|
347
|
+
return Ok({
|
|
348
|
+
kind: "Value",
|
|
349
|
+
pattern: {
|
|
350
|
+
type: "Number",
|
|
351
|
+
pattern: numberPatternLessThan(numResult.value.token.value),
|
|
352
|
+
},
|
|
353
|
+
});
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
// Unexpected tokens - these token types are not valid as primary patterns
|
|
357
|
+
case "And":
|
|
358
|
+
case "Or":
|
|
359
|
+
case "Not":
|
|
360
|
+
case "RepeatZeroOrMoreLazy":
|
|
361
|
+
case "RepeatZeroOrMorePossessive":
|
|
362
|
+
case "RepeatOneOrMore":
|
|
363
|
+
case "RepeatOneOrMoreLazy":
|
|
364
|
+
case "RepeatOneOrMorePossessive":
|
|
365
|
+
case "RepeatZeroOrOne":
|
|
366
|
+
case "RepeatZeroOrOneLazy":
|
|
367
|
+
case "RepeatZeroOrOnePossessive":
|
|
368
|
+
case "ParenClose":
|
|
369
|
+
case "BracketClose":
|
|
370
|
+
case "BraceClose":
|
|
371
|
+
case "Comma":
|
|
372
|
+
case "Colon":
|
|
373
|
+
case "Ellipsis":
|
|
374
|
+
return Err({
|
|
375
|
+
type: "UnexpectedToken",
|
|
376
|
+
token,
|
|
377
|
+
span: spanned.span,
|
|
378
|
+
});
|
|
379
|
+
}
|
|
380
|
+
};
|
|
381
|
+
|
|
382
|
+
/**
|
|
383
|
+
* Parse a single-quoted pattern as a known value.
|
|
384
|
+
*
|
|
385
|
+
* This handles the non-prefixed single-quoted syntax:
|
|
386
|
+
* - 'value' -> known value by numeric ID
|
|
387
|
+
* - 'name' -> known value by name
|
|
388
|
+
* - '/regex/' -> known value by regex
|
|
389
|
+
*/
|
|
390
|
+
const parseSingleQuotedAsKnownValue = (value: string): Result<Pattern> => {
|
|
391
|
+
// Check if it's a regex pattern (starts and ends with /)
|
|
392
|
+
if (value.startsWith("/") && value.endsWith("/") && value.length > 2) {
|
|
393
|
+
const regexStr = value.slice(1, -1);
|
|
394
|
+
try {
|
|
395
|
+
const regex = new RegExp(regexStr);
|
|
396
|
+
return Ok({
|
|
397
|
+
kind: "Value",
|
|
398
|
+
pattern: { type: "KnownValue", pattern: knownValuePatternRegex(regex) },
|
|
399
|
+
});
|
|
400
|
+
} catch {
|
|
401
|
+
return Err({ type: "InvalidRegex", span: { start: 0, end: value.length } });
|
|
402
|
+
}
|
|
403
|
+
}
|
|
404
|
+
|
|
405
|
+
// Try to parse as numeric ID
|
|
406
|
+
const numericValue = parseInt(value, 10);
|
|
407
|
+
if (!isNaN(numericValue) && numericValue.toString() === value && numericValue >= 0) {
|
|
408
|
+
const knownValue = new KnownValue(BigInt(numericValue));
|
|
409
|
+
return Ok({
|
|
410
|
+
kind: "Value",
|
|
411
|
+
pattern: { type: "KnownValue", pattern: knownValuePatternValue(knownValue) },
|
|
412
|
+
});
|
|
413
|
+
}
|
|
414
|
+
|
|
415
|
+
// Otherwise treat as name
|
|
416
|
+
return Ok({
|
|
417
|
+
kind: "Value",
|
|
418
|
+
pattern: { type: "KnownValue", pattern: knownValuePatternNamed(value) },
|
|
419
|
+
});
|
|
420
|
+
};
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Repeat pattern parser (quantifiers).
|
|
3
|
+
*
|
|
4
|
+
* @module parse/meta/repeat-parser
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import type { Lexer } from "../token";
|
|
8
|
+
import type { Pattern } from "../../pattern";
|
|
9
|
+
import type { Result } from "../../error";
|
|
10
|
+
import { Ok } from "../../error";
|
|
11
|
+
import { Quantifier } from "../../quantifier";
|
|
12
|
+
import { Reluctance } from "../../reluctance";
|
|
13
|
+
import { repeatPattern } from "../../pattern/meta/repeat-pattern";
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
* Parse quantifier tokens that follow a grouped pattern.
|
|
17
|
+
*/
|
|
18
|
+
export const parseQuantifier = (
|
|
19
|
+
pattern: Pattern,
|
|
20
|
+
lexer: Lexer,
|
|
21
|
+
forceRepeat: boolean,
|
|
22
|
+
): Result<Pattern> => {
|
|
23
|
+
const peeked = lexer.peekToken();
|
|
24
|
+
|
|
25
|
+
if (peeked?.ok !== true) {
|
|
26
|
+
// No quantifier found
|
|
27
|
+
if (forceRepeat) {
|
|
28
|
+
return Ok(wrapInRepeat(pattern, Quantifier.exactly(1)));
|
|
29
|
+
}
|
|
30
|
+
return Ok(pattern);
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
const token = peeked.value;
|
|
34
|
+
|
|
35
|
+
switch (token.type) {
|
|
36
|
+
case "RepeatZeroOrMore":
|
|
37
|
+
lexer.next();
|
|
38
|
+
return Ok(wrapInRepeat(pattern, Quantifier.zeroOrMore()));
|
|
39
|
+
|
|
40
|
+
case "RepeatZeroOrMoreLazy":
|
|
41
|
+
lexer.next();
|
|
42
|
+
return Ok(wrapInRepeat(pattern, Quantifier.zeroOrMore(Reluctance.Lazy)));
|
|
43
|
+
|
|
44
|
+
case "RepeatZeroOrMorePossessive":
|
|
45
|
+
lexer.next();
|
|
46
|
+
return Ok(wrapInRepeat(pattern, Quantifier.zeroOrMore(Reluctance.Possessive)));
|
|
47
|
+
|
|
48
|
+
case "RepeatOneOrMore":
|
|
49
|
+
lexer.next();
|
|
50
|
+
return Ok(wrapInRepeat(pattern, Quantifier.oneOrMore()));
|
|
51
|
+
|
|
52
|
+
case "RepeatOneOrMoreLazy":
|
|
53
|
+
lexer.next();
|
|
54
|
+
return Ok(wrapInRepeat(pattern, Quantifier.oneOrMore(Reluctance.Lazy)));
|
|
55
|
+
|
|
56
|
+
case "RepeatOneOrMorePossessive":
|
|
57
|
+
lexer.next();
|
|
58
|
+
return Ok(wrapInRepeat(pattern, Quantifier.oneOrMore(Reluctance.Possessive)));
|
|
59
|
+
|
|
60
|
+
case "RepeatZeroOrOne":
|
|
61
|
+
lexer.next();
|
|
62
|
+
return Ok(wrapInRepeat(pattern, Quantifier.zeroOrOne()));
|
|
63
|
+
|
|
64
|
+
case "RepeatZeroOrOneLazy":
|
|
65
|
+
lexer.next();
|
|
66
|
+
return Ok(wrapInRepeat(pattern, Quantifier.zeroOrOne(Reluctance.Lazy)));
|
|
67
|
+
|
|
68
|
+
case "RepeatZeroOrOnePossessive":
|
|
69
|
+
lexer.next();
|
|
70
|
+
return Ok(wrapInRepeat(pattern, Quantifier.zeroOrOne(Reluctance.Possessive)));
|
|
71
|
+
|
|
72
|
+
case "Range":
|
|
73
|
+
lexer.next();
|
|
74
|
+
return Ok(wrapInRepeat(pattern, token.quantifier));
|
|
75
|
+
|
|
76
|
+
// All other tokens - no quantifier found
|
|
77
|
+
case "And":
|
|
78
|
+
case "Or":
|
|
79
|
+
case "Not":
|
|
80
|
+
case "Tagged":
|
|
81
|
+
case "Array":
|
|
82
|
+
case "Map":
|
|
83
|
+
case "Bool":
|
|
84
|
+
case "ByteString":
|
|
85
|
+
case "Date":
|
|
86
|
+
case "Known":
|
|
87
|
+
case "Null":
|
|
88
|
+
case "Number":
|
|
89
|
+
case "Text":
|
|
90
|
+
case "Digest":
|
|
91
|
+
case "Search":
|
|
92
|
+
case "BoolTrue":
|
|
93
|
+
case "BoolFalse":
|
|
94
|
+
case "NaN":
|
|
95
|
+
case "Infinity":
|
|
96
|
+
case "NegInfinity":
|
|
97
|
+
case "ParenOpen":
|
|
98
|
+
case "ParenClose":
|
|
99
|
+
case "BracketOpen":
|
|
100
|
+
case "BracketClose":
|
|
101
|
+
case "BraceOpen":
|
|
102
|
+
case "BraceClose":
|
|
103
|
+
case "Comma":
|
|
104
|
+
case "Colon":
|
|
105
|
+
case "Ellipsis":
|
|
106
|
+
case "GreaterThanOrEqual":
|
|
107
|
+
case "LessThanOrEqual":
|
|
108
|
+
case "GreaterThan":
|
|
109
|
+
case "LessThan":
|
|
110
|
+
case "NumberLiteral":
|
|
111
|
+
case "GroupName":
|
|
112
|
+
case "StringLiteral":
|
|
113
|
+
case "SingleQuoted":
|
|
114
|
+
case "Regex":
|
|
115
|
+
case "HexString":
|
|
116
|
+
case "HexRegex":
|
|
117
|
+
case "DateQuoted":
|
|
118
|
+
case "DigestQuoted":
|
|
119
|
+
// No quantifier found
|
|
120
|
+
if (forceRepeat) {
|
|
121
|
+
return Ok(wrapInRepeat(pattern, Quantifier.exactly(1)));
|
|
122
|
+
}
|
|
123
|
+
return Ok(pattern);
|
|
124
|
+
}
|
|
125
|
+
};
|
|
126
|
+
|
|
127
|
+
/**
|
|
128
|
+
* Wrap a pattern in a RepeatPattern with the given quantifier.
|
|
129
|
+
*/
|
|
130
|
+
const wrapInRepeat = (pattern: Pattern, quantifier: Quantifier): Pattern => ({
|
|
131
|
+
kind: "Meta",
|
|
132
|
+
pattern: { type: "Repeat", pattern: repeatPattern(pattern, quantifier) },
|
|
133
|
+
});
|