@bcts/envelope-pattern 1.0.0-alpha.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +48 -0
- package/README.md +13 -0
- package/dist/index.cjs +6781 -0
- package/dist/index.cjs.map +1 -0
- package/dist/index.d.cts +2628 -0
- package/dist/index.d.cts.map +1 -0
- package/dist/index.d.mts +2628 -0
- package/dist/index.d.mts.map +1 -0
- package/dist/index.iife.js +6781 -0
- package/dist/index.iife.js.map +1 -0
- package/dist/index.mjs +6545 -0
- package/dist/index.mjs.map +1 -0
- package/package.json +77 -0
- package/src/error.ts +262 -0
- package/src/format.ts +375 -0
- package/src/index.ts +27 -0
- package/src/parse/index.ts +923 -0
- package/src/parse/token.ts +906 -0
- package/src/parse/utils.ts +339 -0
- package/src/pattern/index.ts +719 -0
- package/src/pattern/leaf/array-pattern.ts +273 -0
- package/src/pattern/leaf/bool-pattern.ts +140 -0
- package/src/pattern/leaf/byte-string-pattern.ts +172 -0
- package/src/pattern/leaf/cbor-pattern.ts +355 -0
- package/src/pattern/leaf/date-pattern.ts +178 -0
- package/src/pattern/leaf/index.ts +280 -0
- package/src/pattern/leaf/known-value-pattern.ts +192 -0
- package/src/pattern/leaf/map-pattern.ts +152 -0
- package/src/pattern/leaf/null-pattern.ts +110 -0
- package/src/pattern/leaf/number-pattern.ts +248 -0
- package/src/pattern/leaf/tagged-pattern.ts +228 -0
- package/src/pattern/leaf/text-pattern.ts +165 -0
- package/src/pattern/matcher.ts +88 -0
- package/src/pattern/meta/and-pattern.ts +109 -0
- package/src/pattern/meta/any-pattern.ts +81 -0
- package/src/pattern/meta/capture-pattern.ts +111 -0
- package/src/pattern/meta/group-pattern.ts +110 -0
- package/src/pattern/meta/index.ts +269 -0
- package/src/pattern/meta/not-pattern.ts +91 -0
- package/src/pattern/meta/or-pattern.ts +146 -0
- package/src/pattern/meta/search-pattern.ts +201 -0
- package/src/pattern/meta/traverse-pattern.ts +146 -0
- package/src/pattern/structure/assertions-pattern.ts +244 -0
- package/src/pattern/structure/digest-pattern.ts +225 -0
- package/src/pattern/structure/index.ts +272 -0
- package/src/pattern/structure/leaf-structure-pattern.ts +85 -0
- package/src/pattern/structure/node-pattern.ts +188 -0
- package/src/pattern/structure/object-pattern.ts +149 -0
- package/src/pattern/structure/obscured-pattern.ts +159 -0
- package/src/pattern/structure/predicate-pattern.ts +151 -0
- package/src/pattern/structure/subject-pattern.ts +152 -0
- package/src/pattern/structure/wrapped-pattern.ts +195 -0
- package/src/pattern/vm.ts +1021 -0
|
@@ -0,0 +1,906 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @bcts/envelope-pattern - Token types and Lexer
|
|
3
|
+
*
|
|
4
|
+
* This is a 1:1 TypeScript port of bc-envelope-pattern-rust token.rs
|
|
5
|
+
* Uses a manual lexer implementation instead of logos.
|
|
6
|
+
*
|
|
7
|
+
* @module envelope-pattern/parse/token
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import { Quantifier, Reluctance } from "@bcts/dcbor-pattern";
|
|
11
|
+
import {
|
|
12
|
+
type Span,
|
|
13
|
+
type Result,
|
|
14
|
+
type EnvelopePatternError,
|
|
15
|
+
ok,
|
|
16
|
+
err,
|
|
17
|
+
invalidRegex,
|
|
18
|
+
unterminatedRegex,
|
|
19
|
+
invalidRange,
|
|
20
|
+
invalidHexString,
|
|
21
|
+
unexpectedEndOfInput,
|
|
22
|
+
invalidNumberFormat,
|
|
23
|
+
} from "../error";
|
|
24
|
+
|
|
25
|
+
/**
|
|
26
|
+
* Token types for the Gordian Envelope pattern syntax.
|
|
27
|
+
*
|
|
28
|
+
* Corresponds to the Rust `Token` enum in token.rs
|
|
29
|
+
*/
|
|
30
|
+
export type Token =
|
|
31
|
+
// Meta Pattern Operators
|
|
32
|
+
| { readonly type: "And" }
|
|
33
|
+
| { readonly type: "Or" }
|
|
34
|
+
| { readonly type: "Not" }
|
|
35
|
+
| { readonly type: "Traverse" }
|
|
36
|
+
| { readonly type: "RepeatZeroOrMore" }
|
|
37
|
+
| { readonly type: "RepeatZeroOrMoreLazy" }
|
|
38
|
+
| { readonly type: "RepeatZeroOrMorePossessive" }
|
|
39
|
+
| { readonly type: "RepeatOneOrMore" }
|
|
40
|
+
| { readonly type: "RepeatOneOrMoreLazy" }
|
|
41
|
+
| { readonly type: "RepeatOneOrMorePossessive" }
|
|
42
|
+
| { readonly type: "RepeatZeroOrOne" }
|
|
43
|
+
| { readonly type: "RepeatZeroOrOneLazy" }
|
|
44
|
+
| { readonly type: "RepeatZeroOrOnePossessive" }
|
|
45
|
+
// Structure Pattern Keywords
|
|
46
|
+
| { readonly type: "Assertion" }
|
|
47
|
+
| { readonly type: "AssertionPred" }
|
|
48
|
+
| { readonly type: "AssertionObj" }
|
|
49
|
+
| { readonly type: "Digest" }
|
|
50
|
+
| { readonly type: "Node" }
|
|
51
|
+
| { readonly type: "Obj" }
|
|
52
|
+
| { readonly type: "Obscured" }
|
|
53
|
+
| { readonly type: "Elided" }
|
|
54
|
+
| { readonly type: "Encrypted" }
|
|
55
|
+
| { readonly type: "Compressed" }
|
|
56
|
+
| { readonly type: "Pred" }
|
|
57
|
+
| { readonly type: "Subject" }
|
|
58
|
+
| { readonly type: "Wrapped" }
|
|
59
|
+
| { readonly type: "Unwrap" }
|
|
60
|
+
| { readonly type: "Search" }
|
|
61
|
+
// Leaf Pattern Keywords
|
|
62
|
+
| { readonly type: "ByteString" }
|
|
63
|
+
| { readonly type: "Leaf" }
|
|
64
|
+
| { readonly type: "Cbor" }
|
|
65
|
+
| { readonly type: "DateKeyword" }
|
|
66
|
+
| { readonly type: "Known" }
|
|
67
|
+
| { readonly type: "Null" }
|
|
68
|
+
| { readonly type: "NumberKeyword" }
|
|
69
|
+
| { readonly type: "Tagged" }
|
|
70
|
+
// Special literals
|
|
71
|
+
| { readonly type: "BoolKeyword" }
|
|
72
|
+
| { readonly type: "BoolTrue" }
|
|
73
|
+
| { readonly type: "BoolFalse" }
|
|
74
|
+
| { readonly type: "TextKeyword" }
|
|
75
|
+
| { readonly type: "NaN" }
|
|
76
|
+
| { readonly type: "StringLiteral"; readonly value: Result<string> }
|
|
77
|
+
// Grouping and Range delimiters
|
|
78
|
+
| { readonly type: "ParenOpen" }
|
|
79
|
+
| { readonly type: "ParenClose" }
|
|
80
|
+
| { readonly type: "BracketOpen" }
|
|
81
|
+
| { readonly type: "BracketClose" }
|
|
82
|
+
| { readonly type: "Comma" }
|
|
83
|
+
| { readonly type: "Ellipsis" }
|
|
84
|
+
| { readonly type: "GreaterThanOrEqual" }
|
|
85
|
+
| { readonly type: "LessThanOrEqual" }
|
|
86
|
+
| { readonly type: "GreaterThan" }
|
|
87
|
+
| { readonly type: "LessThan" }
|
|
88
|
+
// Numbers
|
|
89
|
+
| { readonly type: "Integer"; readonly value: Result<number> }
|
|
90
|
+
| { readonly type: "UnsignedInteger"; readonly value: Result<number> }
|
|
91
|
+
| { readonly type: "Float"; readonly value: Result<number> }
|
|
92
|
+
| { readonly type: "Infinity" }
|
|
93
|
+
| { readonly type: "NegativeInfinity" }
|
|
94
|
+
// Complex tokens
|
|
95
|
+
| { readonly type: "GroupName"; readonly name: string }
|
|
96
|
+
| { readonly type: "Regex"; readonly value: Result<string> }
|
|
97
|
+
| { readonly type: "HexPattern"; readonly value: Result<Uint8Array> }
|
|
98
|
+
| { readonly type: "HexBinaryRegex"; readonly value: Result<string> }
|
|
99
|
+
| { readonly type: "DatePattern"; readonly value: Result<string> }
|
|
100
|
+
| { readonly type: "Range"; readonly value: Result<Quantifier> }
|
|
101
|
+
| { readonly type: "SingleQuotedPattern"; readonly value: Result<string> }
|
|
102
|
+
| { readonly type: "SingleQuotedRegex"; readonly value: Result<string> };
|
|
103
|
+
|
|
104
|
+
/**
|
|
105
|
+
* Keyword to token type mapping.
|
|
106
|
+
*/
|
|
107
|
+
const KEYWORDS = new Map<string, Token>([
|
|
108
|
+
// Meta Pattern Operators
|
|
109
|
+
["&", { type: "And" }],
|
|
110
|
+
["|", { type: "Or" }],
|
|
111
|
+
["!", { type: "Not" }],
|
|
112
|
+
// Structure Pattern Keywords
|
|
113
|
+
["assert", { type: "Assertion" }],
|
|
114
|
+
["assertpred", { type: "AssertionPred" }],
|
|
115
|
+
["assertobj", { type: "AssertionObj" }],
|
|
116
|
+
["digest", { type: "Digest" }],
|
|
117
|
+
["node", { type: "Node" }],
|
|
118
|
+
["obj", { type: "Obj" }],
|
|
119
|
+
["obscured", { type: "Obscured" }],
|
|
120
|
+
["elided", { type: "Elided" }],
|
|
121
|
+
["encrypted", { type: "Encrypted" }],
|
|
122
|
+
["compressed", { type: "Compressed" }],
|
|
123
|
+
["pred", { type: "Pred" }],
|
|
124
|
+
["subj", { type: "Subject" }],
|
|
125
|
+
["wrapped", { type: "Wrapped" }],
|
|
126
|
+
["unwrap", { type: "Unwrap" }],
|
|
127
|
+
["search", { type: "Search" }],
|
|
128
|
+
// Leaf Pattern Keywords
|
|
129
|
+
["bstr", { type: "ByteString" }],
|
|
130
|
+
["leaf", { type: "Leaf" }],
|
|
131
|
+
["cbor", { type: "Cbor" }],
|
|
132
|
+
["date", { type: "DateKeyword" }],
|
|
133
|
+
["known", { type: "Known" }],
|
|
134
|
+
["null", { type: "Null" }],
|
|
135
|
+
["number", { type: "NumberKeyword" }],
|
|
136
|
+
["tagged", { type: "Tagged" }],
|
|
137
|
+
// Special literals
|
|
138
|
+
["bool", { type: "BoolKeyword" }],
|
|
139
|
+
["true", { type: "BoolTrue" }],
|
|
140
|
+
["false", { type: "BoolFalse" }],
|
|
141
|
+
["text", { type: "TextKeyword" }],
|
|
142
|
+
["NaN", { type: "NaN" }],
|
|
143
|
+
["Infinity", { type: "Infinity" }],
|
|
144
|
+
["-Infinity", { type: "NegativeInfinity" }],
|
|
145
|
+
]);
|
|
146
|
+
|
|
147
|
+
/**
|
|
148
|
+
* Checks if a character is whitespace.
|
|
149
|
+
*/
|
|
150
|
+
function isWhitespace(ch: string): boolean {
|
|
151
|
+
return ch === " " || ch === "\t" || ch === "\n" || ch === "\r" || ch === "\f";
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
/**
|
|
155
|
+
* Checks if a character can start an identifier.
|
|
156
|
+
*/
|
|
157
|
+
function isIdentStart(ch: string): boolean {
|
|
158
|
+
return (ch >= "a" && ch <= "z") || (ch >= "A" && ch <= "Z") || ch === "_";
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
/**
|
|
162
|
+
* Checks if a character can continue an identifier.
|
|
163
|
+
*/
|
|
164
|
+
function isIdentContinue(ch: string): boolean {
|
|
165
|
+
return isIdentStart(ch) || (ch >= "0" && ch <= "9");
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
/**
|
|
169
|
+
* Checks if a character is a digit.
|
|
170
|
+
*/
|
|
171
|
+
function isDigit(ch: string): boolean {
|
|
172
|
+
return ch >= "0" && ch <= "9";
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
/**
|
|
176
|
+
* Checks if a character is a hex digit.
|
|
177
|
+
*/
|
|
178
|
+
function isHexDigit(ch: string): boolean {
|
|
179
|
+
return (ch >= "0" && ch <= "9") || (ch >= "a" && ch <= "f") || (ch >= "A" && ch <= "F");
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
/**
|
|
183
|
+
* Lexer for Gordian Envelope pattern syntax.
|
|
184
|
+
*/
|
|
185
|
+
export class Lexer {
|
|
186
|
+
readonly #source: string;
|
|
187
|
+
#position = 0;
|
|
188
|
+
#tokenStart = 0;
|
|
189
|
+
#peekedToken: { token: Token; span: Span } | undefined = undefined;
|
|
190
|
+
|
|
191
|
+
constructor(source: string) {
|
|
192
|
+
this.#source = source;
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
/**
|
|
196
|
+
* Gets the current position in the source.
|
|
197
|
+
*/
|
|
198
|
+
get position(): number {
|
|
199
|
+
return this.#position;
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
/**
|
|
203
|
+
* Peeks at the next token without consuming it.
|
|
204
|
+
*/
|
|
205
|
+
peekToken(): { token: Token; span: Span } | undefined {
|
|
206
|
+
if (this.#peekedToken !== undefined) {
|
|
207
|
+
return this.#peekedToken;
|
|
208
|
+
}
|
|
209
|
+
const result = this.next();
|
|
210
|
+
this.#peekedToken = result;
|
|
211
|
+
return result;
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
/**
|
|
215
|
+
* Gets the current span (from token start to current position).
|
|
216
|
+
*/
|
|
217
|
+
span(): Span {
|
|
218
|
+
return { start: this.#tokenStart, end: this.#position };
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
/**
|
|
222
|
+
* Gets the remaining source string.
|
|
223
|
+
*/
|
|
224
|
+
remainder(): string {
|
|
225
|
+
return this.#source.slice(this.#position);
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
/**
|
|
229
|
+
* Peeks at the current character without consuming it.
|
|
230
|
+
*/
|
|
231
|
+
peek(): string | undefined {
|
|
232
|
+
if (this.#position >= this.#source.length) {
|
|
233
|
+
return undefined;
|
|
234
|
+
}
|
|
235
|
+
return this.#source[this.#position];
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
/**
|
|
239
|
+
* Peeks at the next character without consuming current.
|
|
240
|
+
*/
|
|
241
|
+
peekNext(): string | undefined {
|
|
242
|
+
if (this.#position + 1 >= this.#source.length) {
|
|
243
|
+
return undefined;
|
|
244
|
+
}
|
|
245
|
+
return this.#source[this.#position + 1];
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
/**
|
|
249
|
+
* Advances the position by n characters.
|
|
250
|
+
*/
|
|
251
|
+
bump(n = 1): void {
|
|
252
|
+
this.#position = Math.min(this.#position + n, this.#source.length);
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
/**
|
|
256
|
+
* Skips whitespace.
|
|
257
|
+
*/
|
|
258
|
+
#skipWhitespace(): void {
|
|
259
|
+
while (this.#position < this.#source.length) {
|
|
260
|
+
const ch = this.#source[this.#position];
|
|
261
|
+
if (ch !== undefined && isWhitespace(ch)) {
|
|
262
|
+
this.#position++;
|
|
263
|
+
} else {
|
|
264
|
+
break;
|
|
265
|
+
}
|
|
266
|
+
}
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
/**
|
|
270
|
+
* Parses a string literal (after the opening quote).
|
|
271
|
+
*/
|
|
272
|
+
#parseStringLiteral(): Result<string> {
|
|
273
|
+
const src = this.remainder();
|
|
274
|
+
let escape = false;
|
|
275
|
+
let content = "";
|
|
276
|
+
|
|
277
|
+
for (let i = 0; i < src.length; i++) {
|
|
278
|
+
const b = src[i];
|
|
279
|
+
if (b === undefined) break;
|
|
280
|
+
|
|
281
|
+
if (b === '"' && !escape) {
|
|
282
|
+
// End of string
|
|
283
|
+
this.bump(i + 1);
|
|
284
|
+
return ok(content);
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
if (b === "\\" && !escape) {
|
|
288
|
+
escape = true;
|
|
289
|
+
continue;
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
if (escape) {
|
|
293
|
+
switch (b) {
|
|
294
|
+
case "n":
|
|
295
|
+
content += "\n";
|
|
296
|
+
break;
|
|
297
|
+
case "t":
|
|
298
|
+
content += "\t";
|
|
299
|
+
break;
|
|
300
|
+
case "r":
|
|
301
|
+
content += "\r";
|
|
302
|
+
break;
|
|
303
|
+
case "\\":
|
|
304
|
+
content += "\\";
|
|
305
|
+
break;
|
|
306
|
+
case '"':
|
|
307
|
+
content += '"';
|
|
308
|
+
break;
|
|
309
|
+
default:
|
|
310
|
+
// Invalid escape sequence, but we'll be lenient
|
|
311
|
+
content += "\\";
|
|
312
|
+
content += b;
|
|
313
|
+
}
|
|
314
|
+
escape = false;
|
|
315
|
+
} else {
|
|
316
|
+
content += b;
|
|
317
|
+
}
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
return err(unexpectedEndOfInput());
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
/**
|
|
324
|
+
* Parses a regex pattern (after the opening slash).
|
|
325
|
+
*/
|
|
326
|
+
#parseRegex(): Result<string> {
|
|
327
|
+
const src = this.remainder();
|
|
328
|
+
let escape = false;
|
|
329
|
+
|
|
330
|
+
for (let i = 0; i < src.length; i++) {
|
|
331
|
+
const ch = src[i];
|
|
332
|
+
if (ch === undefined) break;
|
|
333
|
+
|
|
334
|
+
if (ch === "\\" && !escape) {
|
|
335
|
+
escape = true;
|
|
336
|
+
continue;
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
if (ch === "/" && !escape) {
|
|
340
|
+
// Found the closing delimiter
|
|
341
|
+
this.bump(i + 1);
|
|
342
|
+
const content = src.slice(0, i);
|
|
343
|
+
// Validate regex
|
|
344
|
+
try {
|
|
345
|
+
new RegExp(content);
|
|
346
|
+
return ok(content);
|
|
347
|
+
} catch {
|
|
348
|
+
return err(invalidRegex(this.span()));
|
|
349
|
+
}
|
|
350
|
+
}
|
|
351
|
+
|
|
352
|
+
escape = false;
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
return err(unterminatedRegex(this.span()));
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
/**
|
|
359
|
+
* Parses a hex pattern (after h').
|
|
360
|
+
*/
|
|
361
|
+
#parseHexPattern(): Result<Uint8Array> {
|
|
362
|
+
const src = this.remainder();
|
|
363
|
+
|
|
364
|
+
for (let i = 0; i < src.length; i++) {
|
|
365
|
+
const ch = src[i];
|
|
366
|
+
if (ch === undefined) break;
|
|
367
|
+
|
|
368
|
+
if (ch === "'") {
|
|
369
|
+
// Found the closing delimiter
|
|
370
|
+
const hexStr = src.slice(0, i);
|
|
371
|
+
this.bump(i + 1);
|
|
372
|
+
|
|
373
|
+
// Validate and decode hex
|
|
374
|
+
if (hexStr.length % 2 !== 0) {
|
|
375
|
+
return err(invalidHexString(this.span()));
|
|
376
|
+
}
|
|
377
|
+
|
|
378
|
+
const bytes = new Uint8Array(hexStr.length / 2);
|
|
379
|
+
for (let j = 0; j < hexStr.length; j += 2) {
|
|
380
|
+
const byte = parseInt(hexStr.slice(j, j + 2), 16);
|
|
381
|
+
if (Number.isNaN(byte)) {
|
|
382
|
+
return err(invalidHexString(this.span()));
|
|
383
|
+
}
|
|
384
|
+
bytes[j / 2] = byte;
|
|
385
|
+
}
|
|
386
|
+
return ok(bytes);
|
|
387
|
+
}
|
|
388
|
+
|
|
389
|
+
if (!isHexDigit(ch)) {
|
|
390
|
+
return err(invalidHexString(this.span()));
|
|
391
|
+
}
|
|
392
|
+
}
|
|
393
|
+
|
|
394
|
+
return err(invalidHexString(this.span()));
|
|
395
|
+
}
|
|
396
|
+
|
|
397
|
+
/**
|
|
398
|
+
* Parses a hex binary regex (after h'/).
|
|
399
|
+
*/
|
|
400
|
+
#parseHexBinaryRegex(): Result<string> {
|
|
401
|
+
const src = this.remainder();
|
|
402
|
+
let escape = false;
|
|
403
|
+
|
|
404
|
+
for (let i = 0; i < src.length; i++) {
|
|
405
|
+
const ch = src[i];
|
|
406
|
+
if (ch === undefined) break;
|
|
407
|
+
|
|
408
|
+
if (ch === "\\" && !escape) {
|
|
409
|
+
escape = true;
|
|
410
|
+
continue;
|
|
411
|
+
}
|
|
412
|
+
|
|
413
|
+
if (ch === "/" && !escape) {
|
|
414
|
+
// Found the closing delimiter
|
|
415
|
+
this.bump(i + 1);
|
|
416
|
+
// Check for optional closing '
|
|
417
|
+
if (this.peek() === "'") {
|
|
418
|
+
this.bump(1);
|
|
419
|
+
}
|
|
420
|
+
const regexStr = src.slice(0, i);
|
|
421
|
+
// Validate regex
|
|
422
|
+
try {
|
|
423
|
+
new RegExp(regexStr);
|
|
424
|
+
return ok(regexStr);
|
|
425
|
+
} catch {
|
|
426
|
+
return err(invalidRegex(this.span()));
|
|
427
|
+
}
|
|
428
|
+
}
|
|
429
|
+
|
|
430
|
+
escape = false;
|
|
431
|
+
}
|
|
432
|
+
|
|
433
|
+
return err(unterminatedRegex(this.span()));
|
|
434
|
+
}
|
|
435
|
+
|
|
436
|
+
/**
|
|
437
|
+
* Parses a date pattern (after date').
|
|
438
|
+
*/
|
|
439
|
+
#parseDatePattern(): Result<string> {
|
|
440
|
+
const src = this.remainder();
|
|
441
|
+
|
|
442
|
+
for (let i = 0; i < src.length; i++) {
|
|
443
|
+
const ch = src[i];
|
|
444
|
+
if (ch === "'") {
|
|
445
|
+
// Found the closing delimiter
|
|
446
|
+
const content = src.slice(0, i);
|
|
447
|
+
this.bump(i + 1);
|
|
448
|
+
return ok(content);
|
|
449
|
+
}
|
|
450
|
+
}
|
|
451
|
+
|
|
452
|
+
return err(unterminatedRegex(this.span()));
|
|
453
|
+
}
|
|
454
|
+
|
|
455
|
+
/**
|
|
456
|
+
* Parses a range pattern (after {).
|
|
457
|
+
*/
|
|
458
|
+
#parseRange(): Result<Quantifier> {
|
|
459
|
+
const src = this.remainder();
|
|
460
|
+
let pos = 0;
|
|
461
|
+
|
|
462
|
+
// Skip whitespace
|
|
463
|
+
while (pos < src.length && src[pos] !== undefined && isWhitespace(src[pos])) {
|
|
464
|
+
pos++;
|
|
465
|
+
}
|
|
466
|
+
|
|
467
|
+
// Parse minimum value
|
|
468
|
+
const minStart = pos;
|
|
469
|
+
while (pos < src.length && src[pos] !== undefined && isDigit(src[pos])) {
|
|
470
|
+
pos++;
|
|
471
|
+
}
|
|
472
|
+
if (minStart === pos) {
|
|
473
|
+
return err(invalidRange(this.span()));
|
|
474
|
+
}
|
|
475
|
+
const min = parseInt(src.slice(minStart, pos), 10);
|
|
476
|
+
if (Number.isNaN(min)) {
|
|
477
|
+
return err(invalidRange(this.span()));
|
|
478
|
+
}
|
|
479
|
+
|
|
480
|
+
// Skip whitespace
|
|
481
|
+
while (pos < src.length && src[pos] !== undefined && isWhitespace(src[pos])) {
|
|
482
|
+
pos++;
|
|
483
|
+
}
|
|
484
|
+
|
|
485
|
+
let max: number | undefined;
|
|
486
|
+
const ch = src[pos];
|
|
487
|
+
|
|
488
|
+
if (ch === ",") {
|
|
489
|
+
pos++;
|
|
490
|
+
// Skip whitespace
|
|
491
|
+
while (pos < src.length && src[pos] !== undefined && isWhitespace(src[pos])) {
|
|
492
|
+
pos++;
|
|
493
|
+
}
|
|
494
|
+
|
|
495
|
+
const nextCh = src[pos];
|
|
496
|
+
if (nextCh === "}") {
|
|
497
|
+
// {n,} - open ended
|
|
498
|
+
pos++;
|
|
499
|
+
max = undefined;
|
|
500
|
+
} else if (nextCh !== undefined && isDigit(nextCh)) {
|
|
501
|
+
// {n,m} - range
|
|
502
|
+
const maxStart = pos;
|
|
503
|
+
while (pos < src.length && src[pos] !== undefined && isDigit(src[pos])) {
|
|
504
|
+
pos++;
|
|
505
|
+
}
|
|
506
|
+
max = parseInt(src.slice(maxStart, pos), 10);
|
|
507
|
+
if (Number.isNaN(max)) {
|
|
508
|
+
return err(invalidRange(this.span()));
|
|
509
|
+
}
|
|
510
|
+
|
|
511
|
+
// Skip whitespace
|
|
512
|
+
while (pos < src.length && src[pos] !== undefined && isWhitespace(src[pos])) {
|
|
513
|
+
pos++;
|
|
514
|
+
}
|
|
515
|
+
|
|
516
|
+
if (src[pos] !== "}") {
|
|
517
|
+
return err(invalidRange(this.span()));
|
|
518
|
+
}
|
|
519
|
+
pos++;
|
|
520
|
+
} else {
|
|
521
|
+
return err(invalidRange(this.span()));
|
|
522
|
+
}
|
|
523
|
+
} else if (ch === "}") {
|
|
524
|
+
// {n} - exact
|
|
525
|
+
pos++;
|
|
526
|
+
max = min;
|
|
527
|
+
} else {
|
|
528
|
+
return err(invalidRange(this.span()));
|
|
529
|
+
}
|
|
530
|
+
|
|
531
|
+
// Determine greediness
|
|
532
|
+
let mode: Reluctance = Reluctance.Greedy;
|
|
533
|
+
const modeChar = src[pos];
|
|
534
|
+
if (modeChar === "?") {
|
|
535
|
+
pos++;
|
|
536
|
+
mode = Reluctance.Lazy;
|
|
537
|
+
} else if (modeChar === "+") {
|
|
538
|
+
pos++;
|
|
539
|
+
mode = Reluctance.Possessive;
|
|
540
|
+
}
|
|
541
|
+
|
|
542
|
+
this.bump(pos);
|
|
543
|
+
|
|
544
|
+
if (max !== undefined && min > max) {
|
|
545
|
+
return err(invalidRange(this.span()));
|
|
546
|
+
}
|
|
547
|
+
|
|
548
|
+
if (max !== undefined) {
|
|
549
|
+
return ok(Quantifier.between(min, max, mode));
|
|
550
|
+
}
|
|
551
|
+
return ok(Quantifier.atLeast(min, mode));
|
|
552
|
+
}
|
|
553
|
+
|
|
554
|
+
/**
|
|
555
|
+
* Parses a single quoted pattern (after ').
|
|
556
|
+
*/
|
|
557
|
+
#parseSingleQuotedPattern(): Result<string> {
|
|
558
|
+
const src = this.remainder();
|
|
559
|
+
|
|
560
|
+
for (let i = 0; i < src.length; i++) {
|
|
561
|
+
const ch = src[i];
|
|
562
|
+
if (ch === "'") {
|
|
563
|
+
// Found the closing delimiter
|
|
564
|
+
const content = src.slice(0, i);
|
|
565
|
+
this.bump(i + 1);
|
|
566
|
+
return ok(content);
|
|
567
|
+
}
|
|
568
|
+
}
|
|
569
|
+
|
|
570
|
+
return err(unterminatedRegex(this.span()));
|
|
571
|
+
}
|
|
572
|
+
|
|
573
|
+
/**
|
|
574
|
+
* Parses a single quoted regex (after '/).
|
|
575
|
+
*/
|
|
576
|
+
#parseSingleQuotedRegex(): Result<string> {
|
|
577
|
+
const src = this.remainder();
|
|
578
|
+
let escape = false;
|
|
579
|
+
|
|
580
|
+
for (let i = 0; i < src.length; i++) {
|
|
581
|
+
const ch = src[i];
|
|
582
|
+
if (ch === undefined) break;
|
|
583
|
+
|
|
584
|
+
if (ch === "\\" && !escape) {
|
|
585
|
+
escape = true;
|
|
586
|
+
continue;
|
|
587
|
+
}
|
|
588
|
+
|
|
589
|
+
if (ch === "/" && !escape) {
|
|
590
|
+
// Found the closing delimiter
|
|
591
|
+
this.bump(i + 1);
|
|
592
|
+
// Check for optional closing '
|
|
593
|
+
if (this.peek() === "'") {
|
|
594
|
+
this.bump(1);
|
|
595
|
+
}
|
|
596
|
+
const regexStr = src.slice(0, i);
|
|
597
|
+
// Validate regex
|
|
598
|
+
try {
|
|
599
|
+
new RegExp(regexStr);
|
|
600
|
+
return ok(regexStr);
|
|
601
|
+
} catch {
|
|
602
|
+
return err(invalidRegex(this.span()));
|
|
603
|
+
}
|
|
604
|
+
}
|
|
605
|
+
|
|
606
|
+
escape = false;
|
|
607
|
+
}
|
|
608
|
+
|
|
609
|
+
return err(unterminatedRegex(this.span()));
|
|
610
|
+
}
|
|
611
|
+
|
|
612
|
+
/**
|
|
613
|
+
* Parses a number (integer or float).
|
|
614
|
+
*/
|
|
615
|
+
#parseNumber(): Token {
|
|
616
|
+
const startPos = this.#position;
|
|
617
|
+
let isFloat = false;
|
|
618
|
+
let isNegative = false;
|
|
619
|
+
|
|
620
|
+
// Check for negative sign
|
|
621
|
+
if (this.peek() === "-") {
|
|
622
|
+
isNegative = true;
|
|
623
|
+
this.bump(1);
|
|
624
|
+
}
|
|
625
|
+
|
|
626
|
+
// Parse integer part
|
|
627
|
+
let c = this.peek();
|
|
628
|
+
while (c !== undefined && isDigit(c)) {
|
|
629
|
+
this.bump(1);
|
|
630
|
+
c = this.peek();
|
|
631
|
+
}
|
|
632
|
+
|
|
633
|
+
// Check for decimal point
|
|
634
|
+
const nextC = this.peekNext();
|
|
635
|
+
if (this.peek() === "." && nextC !== undefined && isDigit(nextC)) {
|
|
636
|
+
isFloat = true;
|
|
637
|
+
this.bump(1); // consume '.'
|
|
638
|
+
|
|
639
|
+
// Parse fractional part
|
|
640
|
+
c = this.peek();
|
|
641
|
+
while (c !== undefined && isDigit(c)) {
|
|
642
|
+
this.bump(1);
|
|
643
|
+
c = this.peek();
|
|
644
|
+
}
|
|
645
|
+
}
|
|
646
|
+
|
|
647
|
+
// Check for exponent
|
|
648
|
+
if (this.peek() === "e" || this.peek() === "E") {
|
|
649
|
+
isFloat = true;
|
|
650
|
+
this.bump(1);
|
|
651
|
+
|
|
652
|
+
// Check for sign
|
|
653
|
+
if (this.peek() === "+" || this.peek() === "-") {
|
|
654
|
+
this.bump(1);
|
|
655
|
+
}
|
|
656
|
+
|
|
657
|
+
// Parse exponent digits
|
|
658
|
+
c = this.peek();
|
|
659
|
+
while (c !== undefined && isDigit(c)) {
|
|
660
|
+
this.bump(1);
|
|
661
|
+
c = this.peek();
|
|
662
|
+
}
|
|
663
|
+
}
|
|
664
|
+
|
|
665
|
+
const numStr = this.#source.slice(startPos, this.#position);
|
|
666
|
+
|
|
667
|
+
if (isFloat) {
|
|
668
|
+
const value = parseFloat(numStr);
|
|
669
|
+
if (Number.isNaN(value)) {
|
|
670
|
+
return { type: "Float", value: err(invalidNumberFormat(this.span())) };
|
|
671
|
+
}
|
|
672
|
+
return { type: "Float", value: ok(value) };
|
|
673
|
+
}
|
|
674
|
+
|
|
675
|
+
const value = parseInt(numStr, 10);
|
|
676
|
+
if (Number.isNaN(value)) {
|
|
677
|
+
return { type: "Integer", value: err(invalidNumberFormat(this.span())) };
|
|
678
|
+
}
|
|
679
|
+
|
|
680
|
+
if (isNegative) {
|
|
681
|
+
return { type: "Integer", value: ok(value) };
|
|
682
|
+
}
|
|
683
|
+
return { type: "UnsignedInteger", value: ok(value) };
|
|
684
|
+
}
|
|
685
|
+
|
|
686
|
+
/**
|
|
687
|
+
* Gets the next token from the input.
|
|
688
|
+
*/
|
|
689
|
+
next(): { token: Token; span: Span } | undefined {
|
|
690
|
+
// Return peeked token if available
|
|
691
|
+
if (this.#peekedToken !== undefined) {
|
|
692
|
+
const peeked = this.#peekedToken;
|
|
693
|
+
this.#peekedToken = undefined;
|
|
694
|
+
return peeked;
|
|
695
|
+
}
|
|
696
|
+
|
|
697
|
+
this.#skipWhitespace();
|
|
698
|
+
this.#tokenStart = this.#position;
|
|
699
|
+
|
|
700
|
+
if (this.#position >= this.#source.length) {
|
|
701
|
+
return undefined;
|
|
702
|
+
}
|
|
703
|
+
|
|
704
|
+
const ch = this.#source[this.#position];
|
|
705
|
+
if (ch === undefined) return undefined;
|
|
706
|
+
|
|
707
|
+
// Check for two-character operators first
|
|
708
|
+
const twoChar = this.#source.slice(this.#position, this.#position + 2);
|
|
709
|
+
const threeChar = this.#source.slice(this.#position, this.#position + 3);
|
|
710
|
+
|
|
711
|
+
// Check for ... (ellipsis)
|
|
712
|
+
if (threeChar === "...") {
|
|
713
|
+
this.bump(3);
|
|
714
|
+
return { token: { type: "Ellipsis" }, span: this.span() };
|
|
715
|
+
}
|
|
716
|
+
|
|
717
|
+
// Check for -Infinity
|
|
718
|
+
if (this.#source.slice(this.#position, this.#position + 9) === "-Infinity") {
|
|
719
|
+
this.bump(9);
|
|
720
|
+
return { token: { type: "NegativeInfinity" }, span: this.span() };
|
|
721
|
+
}
|
|
722
|
+
|
|
723
|
+
// Check for two-character operators
|
|
724
|
+
switch (twoChar) {
|
|
725
|
+
case "->":
|
|
726
|
+
this.bump(2);
|
|
727
|
+
return { token: { type: "Traverse" }, span: this.span() };
|
|
728
|
+
case "*?":
|
|
729
|
+
this.bump(2);
|
|
730
|
+
return { token: { type: "RepeatZeroOrMoreLazy" }, span: this.span() };
|
|
731
|
+
case "*+":
|
|
732
|
+
this.bump(2);
|
|
733
|
+
return { token: { type: "RepeatZeroOrMorePossessive" }, span: this.span() };
|
|
734
|
+
case "+?":
|
|
735
|
+
this.bump(2);
|
|
736
|
+
return { token: { type: "RepeatOneOrMoreLazy" }, span: this.span() };
|
|
737
|
+
case "++":
|
|
738
|
+
this.bump(2);
|
|
739
|
+
return { token: { type: "RepeatOneOrMorePossessive" }, span: this.span() };
|
|
740
|
+
case "??":
|
|
741
|
+
this.bump(2);
|
|
742
|
+
return { token: { type: "RepeatZeroOrOneLazy" }, span: this.span() };
|
|
743
|
+
case "?+":
|
|
744
|
+
this.bump(2);
|
|
745
|
+
return { token: { type: "RepeatZeroOrOnePossessive" }, span: this.span() };
|
|
746
|
+
case ">=":
|
|
747
|
+
this.bump(2);
|
|
748
|
+
return { token: { type: "GreaterThanOrEqual" }, span: this.span() };
|
|
749
|
+
case "<=":
|
|
750
|
+
this.bump(2);
|
|
751
|
+
return { token: { type: "LessThanOrEqual" }, span: this.span() };
|
|
752
|
+
case "h'": {
|
|
753
|
+
this.bump(2);
|
|
754
|
+
// Check if followed by / for HexBinaryRegex
|
|
755
|
+
if (this.peek() === "/") {
|
|
756
|
+
this.bump(1);
|
|
757
|
+
return {
|
|
758
|
+
token: { type: "HexBinaryRegex", value: this.#parseHexBinaryRegex() },
|
|
759
|
+
span: this.span(),
|
|
760
|
+
};
|
|
761
|
+
}
|
|
762
|
+
return { token: { type: "HexPattern", value: this.#parseHexPattern() }, span: this.span() };
|
|
763
|
+
}
|
|
764
|
+
case "'/":
|
|
765
|
+
this.bump(2);
|
|
766
|
+
return {
|
|
767
|
+
token: { type: "SingleQuotedRegex", value: this.#parseSingleQuotedRegex() },
|
|
768
|
+
span: this.span(),
|
|
769
|
+
};
|
|
770
|
+
}
|
|
771
|
+
|
|
772
|
+
// Check for single character operators
|
|
773
|
+
switch (ch) {
|
|
774
|
+
case "&":
|
|
775
|
+
this.bump(1);
|
|
776
|
+
return { token: { type: "And" }, span: this.span() };
|
|
777
|
+
case "|":
|
|
778
|
+
this.bump(1);
|
|
779
|
+
return { token: { type: "Or" }, span: this.span() };
|
|
780
|
+
case "!":
|
|
781
|
+
this.bump(1);
|
|
782
|
+
return { token: { type: "Not" }, span: this.span() };
|
|
783
|
+
case "*":
|
|
784
|
+
this.bump(1);
|
|
785
|
+
return { token: { type: "RepeatZeroOrMore" }, span: this.span() };
|
|
786
|
+
case "+":
|
|
787
|
+
this.bump(1);
|
|
788
|
+
return { token: { type: "RepeatOneOrMore" }, span: this.span() };
|
|
789
|
+
case "?":
|
|
790
|
+
this.bump(1);
|
|
791
|
+
return { token: { type: "RepeatZeroOrOne" }, span: this.span() };
|
|
792
|
+
case "(":
|
|
793
|
+
this.bump(1);
|
|
794
|
+
return { token: { type: "ParenOpen" }, span: this.span() };
|
|
795
|
+
case ")":
|
|
796
|
+
this.bump(1);
|
|
797
|
+
return { token: { type: "ParenClose" }, span: this.span() };
|
|
798
|
+
case "[":
|
|
799
|
+
this.bump(1);
|
|
800
|
+
return { token: { type: "BracketOpen" }, span: this.span() };
|
|
801
|
+
case "]":
|
|
802
|
+
this.bump(1);
|
|
803
|
+
return { token: { type: "BracketClose" }, span: this.span() };
|
|
804
|
+
case ",":
|
|
805
|
+
this.bump(1);
|
|
806
|
+
return { token: { type: "Comma" }, span: this.span() };
|
|
807
|
+
case ">":
|
|
808
|
+
this.bump(1);
|
|
809
|
+
return { token: { type: "GreaterThan" }, span: this.span() };
|
|
810
|
+
case "<":
|
|
811
|
+
this.bump(1);
|
|
812
|
+
return { token: { type: "LessThan" }, span: this.span() };
|
|
813
|
+
case '"':
|
|
814
|
+
this.bump(1);
|
|
815
|
+
return {
|
|
816
|
+
token: { type: "StringLiteral", value: this.#parseStringLiteral() },
|
|
817
|
+
span: this.span(),
|
|
818
|
+
};
|
|
819
|
+
case "/":
|
|
820
|
+
this.bump(1);
|
|
821
|
+
return { token: { type: "Regex", value: this.#parseRegex() }, span: this.span() };
|
|
822
|
+
case "{":
|
|
823
|
+
this.bump(1);
|
|
824
|
+
return { token: { type: "Range", value: this.#parseRange() }, span: this.span() };
|
|
825
|
+
case "'":
|
|
826
|
+
this.bump(1);
|
|
827
|
+
return {
|
|
828
|
+
token: { type: "SingleQuotedPattern", value: this.#parseSingleQuotedPattern() },
|
|
829
|
+
span: this.span(),
|
|
830
|
+
};
|
|
831
|
+
case "@": {
|
|
832
|
+
// Group name
|
|
833
|
+
this.bump(1);
|
|
834
|
+
const start = this.#position;
|
|
835
|
+
let gc = this.peek();
|
|
836
|
+
if (gc !== undefined && isIdentStart(gc)) {
|
|
837
|
+
gc = this.peek();
|
|
838
|
+
while (gc !== undefined && isIdentContinue(gc)) {
|
|
839
|
+
this.bump(1);
|
|
840
|
+
gc = this.peek();
|
|
841
|
+
}
|
|
842
|
+
const name = this.#source.slice(start, this.#position);
|
|
843
|
+
return { token: { type: "GroupName", name }, span: this.span() };
|
|
844
|
+
}
|
|
845
|
+
// Invalid group name, return as error token
|
|
846
|
+
return { token: { type: "GroupName", name: "" }, span: this.span() };
|
|
847
|
+
}
|
|
848
|
+
}
|
|
849
|
+
|
|
850
|
+
// Check for date' pattern
|
|
851
|
+
if (this.#source.slice(this.#position, this.#position + 5) === "date'") {
|
|
852
|
+
this.bump(5);
|
|
853
|
+
return { token: { type: "DatePattern", value: this.#parseDatePattern() }, span: this.span() };
|
|
854
|
+
}
|
|
855
|
+
|
|
856
|
+
// Check for number (including negative)
|
|
857
|
+
const nextChar = this.peekNext();
|
|
858
|
+
if (isDigit(ch) || (ch === "-" && nextChar !== undefined && isDigit(nextChar))) {
|
|
859
|
+
return { token: this.#parseNumber(), span: this.span() };
|
|
860
|
+
}
|
|
861
|
+
|
|
862
|
+
// Check for identifier/keyword
|
|
863
|
+
if (isIdentStart(ch)) {
|
|
864
|
+
const start = this.#position;
|
|
865
|
+
let ic = this.peek();
|
|
866
|
+
while (ic !== undefined && isIdentContinue(ic)) {
|
|
867
|
+
this.bump(1);
|
|
868
|
+
ic = this.peek();
|
|
869
|
+
}
|
|
870
|
+
const ident = this.#source.slice(start, this.#position);
|
|
871
|
+
|
|
872
|
+
// Check for keywords
|
|
873
|
+
const keyword = KEYWORDS.get(ident);
|
|
874
|
+
if (keyword !== undefined) {
|
|
875
|
+
return { token: keyword, span: this.span() };
|
|
876
|
+
}
|
|
877
|
+
|
|
878
|
+
// Unknown identifier - treat as error
|
|
879
|
+
return undefined;
|
|
880
|
+
}
|
|
881
|
+
|
|
882
|
+
// Unknown character
|
|
883
|
+
this.bump(1);
|
|
884
|
+
return undefined;
|
|
885
|
+
}
|
|
886
|
+
|
|
887
|
+
/**
|
|
888
|
+
* Iterates over all tokens.
|
|
889
|
+
*/
|
|
890
|
+
*[Symbol.iterator](): Iterator<
|
|
891
|
+
{ token: Token; span: Span } | { error: EnvelopePatternError; span: Span }
|
|
892
|
+
> {
|
|
893
|
+
let result = this.next();
|
|
894
|
+
while (result !== undefined) {
|
|
895
|
+
yield result;
|
|
896
|
+
result = this.next();
|
|
897
|
+
}
|
|
898
|
+
}
|
|
899
|
+
}
|
|
900
|
+
|
|
901
|
+
/**
|
|
902
|
+
* Creates a new lexer for the given source.
|
|
903
|
+
*/
|
|
904
|
+
export function lexer(source: string): Lexer {
|
|
905
|
+
return new Lexer(source);
|
|
906
|
+
}
|