@bcts/dcbor-pattern 1.0.0-alpha.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +48 -0
- package/README.md +14 -0
- package/dist/index.cjs +6561 -0
- package/dist/index.cjs.map +1 -0
- package/dist/index.d.cts +2732 -0
- package/dist/index.d.cts.map +1 -0
- package/dist/index.d.mts +2732 -0
- package/dist/index.d.mts.map +1 -0
- package/dist/index.iife.js +6562 -0
- package/dist/index.iife.js.map +1 -0
- package/dist/index.mjs +6244 -0
- package/dist/index.mjs.map +1 -0
- package/package.json +85 -0
- package/src/error.ts +333 -0
- package/src/format.ts +299 -0
- package/src/index.ts +20 -0
- package/src/interval.ts +230 -0
- package/src/parse/index.ts +95 -0
- package/src/parse/meta/and-parser.ts +47 -0
- package/src/parse/meta/capture-parser.ts +56 -0
- package/src/parse/meta/index.ts +13 -0
- package/src/parse/meta/not-parser.ts +28 -0
- package/src/parse/meta/or-parser.ts +47 -0
- package/src/parse/meta/primary-parser.ts +420 -0
- package/src/parse/meta/repeat-parser.ts +133 -0
- package/src/parse/meta/search-parser.ts +56 -0
- package/src/parse/parse-registry.ts +31 -0
- package/src/parse/structure/array-parser.ts +210 -0
- package/src/parse/structure/index.ts +9 -0
- package/src/parse/structure/map-parser.ts +128 -0
- package/src/parse/structure/tagged-parser.ts +269 -0
- package/src/parse/token.ts +997 -0
- package/src/parse/value/bool-parser.ts +33 -0
- package/src/parse/value/bytestring-parser.ts +42 -0
- package/src/parse/value/date-parser.ts +24 -0
- package/src/parse/value/digest-parser.ts +24 -0
- package/src/parse/value/index.ts +14 -0
- package/src/parse/value/known-value-parser.ts +24 -0
- package/src/parse/value/null-parser.ts +19 -0
- package/src/parse/value/number-parser.ts +19 -0
- package/src/parse/value/text-parser.ts +43 -0
- package/src/pattern/index.ts +740 -0
- package/src/pattern/match-registry.ts +137 -0
- package/src/pattern/matcher.ts +388 -0
- package/src/pattern/meta/and-pattern.ts +56 -0
- package/src/pattern/meta/any-pattern.ts +43 -0
- package/src/pattern/meta/capture-pattern.ts +57 -0
- package/src/pattern/meta/index.ts +168 -0
- package/src/pattern/meta/not-pattern.ts +70 -0
- package/src/pattern/meta/or-pattern.ts +56 -0
- package/src/pattern/meta/repeat-pattern.ts +117 -0
- package/src/pattern/meta/search-pattern.ts +298 -0
- package/src/pattern/meta/sequence-pattern.ts +72 -0
- package/src/pattern/structure/array-pattern/assigner.ts +95 -0
- package/src/pattern/structure/array-pattern/backtrack.ts +240 -0
- package/src/pattern/structure/array-pattern/helpers.ts +140 -0
- package/src/pattern/structure/array-pattern/index.ts +502 -0
- package/src/pattern/structure/index.ts +122 -0
- package/src/pattern/structure/map-pattern.ts +255 -0
- package/src/pattern/structure/tagged-pattern.ts +190 -0
- package/src/pattern/value/bool-pattern.ts +67 -0
- package/src/pattern/value/bytes-utils.ts +48 -0
- package/src/pattern/value/bytestring-pattern.ts +111 -0
- package/src/pattern/value/date-pattern.ts +162 -0
- package/src/pattern/value/digest-pattern.ts +136 -0
- package/src/pattern/value/index.ts +168 -0
- package/src/pattern/value/known-value-pattern.ts +123 -0
- package/src/pattern/value/null-pattern.ts +46 -0
- package/src/pattern/value/number-pattern.ts +181 -0
- package/src/pattern/value/text-pattern.ts +82 -0
- package/src/pattern/vm.ts +619 -0
- package/src/quantifier.ts +185 -0
- package/src/reluctance.ts +65 -0
|
@@ -0,0 +1,997 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Token types and Lexer for the dCBOR pattern language.
|
|
3
|
+
*
|
|
4
|
+
* This module provides tokenization for dCBOR pattern expressions,
|
|
5
|
+
* converting input strings into a sequence of tokens for parsing.
|
|
6
|
+
*
|
|
7
|
+
* @module parse/token
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import { type Span, span, type Result, Ok, Err } from "../error";
|
|
11
|
+
import { Quantifier } from "../quantifier";
|
|
12
|
+
import { Reluctance } from "../reluctance";
|
|
13
|
+
|
|
14
|
+
/**
|
|
15
|
+
* Token types for dCBOR pattern parsing.
|
|
16
|
+
*
|
|
17
|
+
* This is a discriminated union matching the Rust Token enum.
|
|
18
|
+
*/
|
|
19
|
+
export type Token =
|
|
20
|
+
// Operators
|
|
21
|
+
| { readonly type: "And" }
|
|
22
|
+
| { readonly type: "Or" }
|
|
23
|
+
| { readonly type: "Not" }
|
|
24
|
+
|
|
25
|
+
// Quantifiers
|
|
26
|
+
| { readonly type: "RepeatZeroOrMore" }
|
|
27
|
+
| { readonly type: "RepeatZeroOrMoreLazy" }
|
|
28
|
+
| { readonly type: "RepeatZeroOrMorePossessive" }
|
|
29
|
+
| { readonly type: "RepeatOneOrMore" }
|
|
30
|
+
| { readonly type: "RepeatOneOrMoreLazy" }
|
|
31
|
+
| { readonly type: "RepeatOneOrMorePossessive" }
|
|
32
|
+
| { readonly type: "RepeatZeroOrOne" }
|
|
33
|
+
| { readonly type: "RepeatZeroOrOneLazy" }
|
|
34
|
+
| { readonly type: "RepeatZeroOrOnePossessive" }
|
|
35
|
+
|
|
36
|
+
// Structure keywords
|
|
37
|
+
| { readonly type: "Tagged" }
|
|
38
|
+
| { readonly type: "Array" }
|
|
39
|
+
| { readonly type: "Map" }
|
|
40
|
+
|
|
41
|
+
// Value keywords
|
|
42
|
+
| { readonly type: "Bool" }
|
|
43
|
+
| { readonly type: "ByteString" }
|
|
44
|
+
| { readonly type: "Date" }
|
|
45
|
+
| { readonly type: "Known" }
|
|
46
|
+
| { readonly type: "Null" }
|
|
47
|
+
| { readonly type: "Number" }
|
|
48
|
+
| { readonly type: "Text" }
|
|
49
|
+
| { readonly type: "Digest" }
|
|
50
|
+
| { readonly type: "Search" }
|
|
51
|
+
|
|
52
|
+
// Literals
|
|
53
|
+
| { readonly type: "BoolTrue" }
|
|
54
|
+
| { readonly type: "BoolFalse" }
|
|
55
|
+
| { readonly type: "NaN" }
|
|
56
|
+
| { readonly type: "Infinity" }
|
|
57
|
+
| { readonly type: "NegInfinity" }
|
|
58
|
+
|
|
59
|
+
// Delimiters
|
|
60
|
+
| { readonly type: "ParenOpen" }
|
|
61
|
+
| { readonly type: "ParenClose" }
|
|
62
|
+
| { readonly type: "BracketOpen" }
|
|
63
|
+
| { readonly type: "BracketClose" }
|
|
64
|
+
| { readonly type: "BraceOpen" }
|
|
65
|
+
| { readonly type: "BraceClose" }
|
|
66
|
+
| { readonly type: "Comma" }
|
|
67
|
+
| { readonly type: "Colon" }
|
|
68
|
+
| { readonly type: "Ellipsis" }
|
|
69
|
+
|
|
70
|
+
// Comparisons
|
|
71
|
+
| { readonly type: "GreaterThanOrEqual" }
|
|
72
|
+
| { readonly type: "LessThanOrEqual" }
|
|
73
|
+
| { readonly type: "GreaterThan" }
|
|
74
|
+
| { readonly type: "LessThan" }
|
|
75
|
+
|
|
76
|
+
// Complex literals
|
|
77
|
+
| { readonly type: "NumberLiteral"; readonly value: number }
|
|
78
|
+
| { readonly type: "GroupName"; readonly name: string }
|
|
79
|
+
| { readonly type: "StringLiteral"; readonly value: string }
|
|
80
|
+
| { readonly type: "SingleQuoted"; readonly value: string }
|
|
81
|
+
| { readonly type: "Regex"; readonly pattern: string }
|
|
82
|
+
| { readonly type: "HexString"; readonly value: Uint8Array }
|
|
83
|
+
| { readonly type: "HexRegex"; readonly pattern: string }
|
|
84
|
+
| { readonly type: "DateQuoted"; readonly value: string }
|
|
85
|
+
| { readonly type: "DigestQuoted"; readonly value: string }
|
|
86
|
+
| { readonly type: "Range"; readonly quantifier: Quantifier };
|
|
87
|
+
|
|
88
|
+
/**
|
|
89
|
+
* A token with its position in the source.
|
|
90
|
+
*/
|
|
91
|
+
export interface SpannedToken {
|
|
92
|
+
readonly token: Token;
|
|
93
|
+
readonly span: Span;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
/**
|
|
97
|
+
* Simple keywords that map directly to tokens.
|
|
98
|
+
*/
|
|
99
|
+
const KEYWORDS: Record<string, Token> = {
|
|
100
|
+
// Structure keywords
|
|
101
|
+
tagged: { type: "Tagged" },
|
|
102
|
+
array: { type: "Array" },
|
|
103
|
+
map: { type: "Map" },
|
|
104
|
+
|
|
105
|
+
// Value keywords
|
|
106
|
+
bool: { type: "Bool" },
|
|
107
|
+
bstr: { type: "ByteString" },
|
|
108
|
+
date: { type: "Date" },
|
|
109
|
+
known: { type: "Known" },
|
|
110
|
+
null: { type: "Null" },
|
|
111
|
+
number: { type: "Number" },
|
|
112
|
+
text: { type: "Text" },
|
|
113
|
+
digest: { type: "Digest" },
|
|
114
|
+
search: { type: "Search" },
|
|
115
|
+
|
|
116
|
+
// Boolean literals
|
|
117
|
+
true: { type: "BoolTrue" },
|
|
118
|
+
false: { type: "BoolFalse" },
|
|
119
|
+
|
|
120
|
+
// Special values
|
|
121
|
+
NaN: { type: "NaN" },
|
|
122
|
+
Infinity: { type: "Infinity" },
|
|
123
|
+
};
|
|
124
|
+
|
|
125
|
+
/**
|
|
126
|
+
* Check if a character is whitespace.
|
|
127
|
+
*/
|
|
128
|
+
const isWhitespace = (ch: string): boolean => {
|
|
129
|
+
return ch === " " || ch === "\t" || ch === "\r" || ch === "\n" || ch === "\f";
|
|
130
|
+
};
|
|
131
|
+
|
|
132
|
+
/**
|
|
133
|
+
* Check if a character is a digit.
|
|
134
|
+
*/
|
|
135
|
+
const isDigit = (ch: string): boolean => {
|
|
136
|
+
return ch >= "0" && ch <= "9";
|
|
137
|
+
};
|
|
138
|
+
|
|
139
|
+
/**
|
|
140
|
+
* Check if a character is a hex digit.
|
|
141
|
+
*/
|
|
142
|
+
const isHexDigit = (ch: string): boolean => {
|
|
143
|
+
return (ch >= "0" && ch <= "9") || (ch >= "a" && ch <= "f") || (ch >= "A" && ch <= "F");
|
|
144
|
+
};
|
|
145
|
+
|
|
146
|
+
/**
|
|
147
|
+
* Check if a character is an identifier start character.
|
|
148
|
+
*/
|
|
149
|
+
const isIdentStart = (ch: string): boolean => {
|
|
150
|
+
return (ch >= "a" && ch <= "z") || (ch >= "A" && ch <= "Z") || ch === "_";
|
|
151
|
+
};
|
|
152
|
+
|
|
153
|
+
/**
|
|
154
|
+
* Check if a character is an identifier continuation character.
|
|
155
|
+
*/
|
|
156
|
+
const isIdentCont = (ch: string): boolean => {
|
|
157
|
+
return isIdentStart(ch) || isDigit(ch);
|
|
158
|
+
};
|
|
159
|
+
|
|
160
|
+
/**
|
|
161
|
+
* Parse a hex string to bytes.
|
|
162
|
+
*/
|
|
163
|
+
const hexToBytes = (hex: string): Uint8Array | undefined => {
|
|
164
|
+
if (hex.length % 2 !== 0) {
|
|
165
|
+
return undefined;
|
|
166
|
+
}
|
|
167
|
+
const bytes = new Uint8Array(hex.length / 2);
|
|
168
|
+
for (let i = 0; i < hex.length; i += 2) {
|
|
169
|
+
const byte = parseInt(hex.slice(i, i + 2), 16);
|
|
170
|
+
if (isNaN(byte)) {
|
|
171
|
+
return undefined;
|
|
172
|
+
}
|
|
173
|
+
bytes[i / 2] = byte;
|
|
174
|
+
}
|
|
175
|
+
return bytes;
|
|
176
|
+
};
|
|
177
|
+
|
|
178
|
+
/**
|
|
179
|
+
* Lexer state for tokenizing dCBOR pattern expressions.
|
|
180
|
+
*/
|
|
181
|
+
export class Lexer {
|
|
182
|
+
readonly #input: string;
|
|
183
|
+
#position: number;
|
|
184
|
+
|
|
185
|
+
constructor(input: string) {
|
|
186
|
+
this.#input = input;
|
|
187
|
+
this.#position = 0;
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
/**
|
|
191
|
+
* Creates a new lexer for the given input.
|
|
192
|
+
*/
|
|
193
|
+
static new(input: string): Lexer {
|
|
194
|
+
return new Lexer(input);
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
/**
|
|
198
|
+
* Returns the input string.
|
|
199
|
+
*/
|
|
200
|
+
input(): string {
|
|
201
|
+
return this.#input;
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
/**
|
|
205
|
+
* Returns the current position in the input.
|
|
206
|
+
*/
|
|
207
|
+
position(): number {
|
|
208
|
+
return this.#position;
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
/**
|
|
212
|
+
* Returns the remaining input.
|
|
213
|
+
*/
|
|
214
|
+
remainder(): string {
|
|
215
|
+
return this.#input.slice(this.#position);
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
/**
|
|
219
|
+
* Peeks at the current character without consuming it.
|
|
220
|
+
*/
|
|
221
|
+
peek(): string | undefined {
|
|
222
|
+
return this.#input[this.#position];
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
/**
|
|
226
|
+
* Peeks at the character at offset from current position.
|
|
227
|
+
*/
|
|
228
|
+
peekAt(offset: number): string | undefined {
|
|
229
|
+
return this.#input[this.#position + offset];
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
/**
|
|
233
|
+
* Consumes and returns the current character.
|
|
234
|
+
*/
|
|
235
|
+
advance(): string | undefined {
|
|
236
|
+
const ch = this.#input[this.#position];
|
|
237
|
+
if (ch !== undefined) {
|
|
238
|
+
this.#position++;
|
|
239
|
+
}
|
|
240
|
+
return ch;
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
/**
|
|
244
|
+
* Advances by n characters.
|
|
245
|
+
*/
|
|
246
|
+
bump(n: number): void {
|
|
247
|
+
this.#position += n;
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
/**
|
|
251
|
+
* Creates a span from start to current position.
|
|
252
|
+
*/
|
|
253
|
+
spanFrom(start: number): Span {
|
|
254
|
+
return span(start, this.#position);
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
/**
|
|
258
|
+
* Skips whitespace characters.
|
|
259
|
+
*/
|
|
260
|
+
skipWhitespace(): void {
|
|
261
|
+
while (this.#position < this.#input.length && isWhitespace(this.#input[this.#position])) {
|
|
262
|
+
this.#position++;
|
|
263
|
+
}
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
/**
|
|
267
|
+
* Checks if the remainder starts with the given string.
|
|
268
|
+
*/
|
|
269
|
+
startsWith(s: string): boolean {
|
|
270
|
+
return this.#input.slice(this.#position).startsWith(s);
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
/**
|
|
274
|
+
* Gets the next token.
|
|
275
|
+
*/
|
|
276
|
+
next(): Result<SpannedToken> | undefined {
|
|
277
|
+
this.skipWhitespace();
|
|
278
|
+
|
|
279
|
+
if (this.#position >= this.#input.length) {
|
|
280
|
+
return undefined;
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
const start = this.#position;
|
|
284
|
+
const ch = this.peek() ?? "";
|
|
285
|
+
|
|
286
|
+
// Try multi-character operators first
|
|
287
|
+
if (this.startsWith("-Infinity")) {
|
|
288
|
+
this.bump(9);
|
|
289
|
+
return Ok({ token: { type: "NegInfinity" }, span: this.spanFrom(start) });
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
if (this.startsWith("...")) {
|
|
293
|
+
this.bump(3);
|
|
294
|
+
return Ok({ token: { type: "Ellipsis" }, span: this.spanFrom(start) });
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
// Two-dot ellipsis for ranges (check after three-dot)
|
|
298
|
+
if (this.startsWith("..") && !this.startsWith("...")) {
|
|
299
|
+
this.bump(2);
|
|
300
|
+
return Ok({ token: { type: "Ellipsis" }, span: this.spanFrom(start) });
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
if (this.startsWith(">=")) {
|
|
304
|
+
this.bump(2);
|
|
305
|
+
return Ok({ token: { type: "GreaterThanOrEqual" }, span: this.spanFrom(start) });
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
if (this.startsWith("<=")) {
|
|
309
|
+
this.bump(2);
|
|
310
|
+
return Ok({ token: { type: "LessThanOrEqual" }, span: this.spanFrom(start) });
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
if (this.startsWith("*?")) {
|
|
314
|
+
this.bump(2);
|
|
315
|
+
return Ok({ token: { type: "RepeatZeroOrMoreLazy" }, span: this.spanFrom(start) });
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
if (this.startsWith("*+")) {
|
|
319
|
+
this.bump(2);
|
|
320
|
+
return Ok({ token: { type: "RepeatZeroOrMorePossessive" }, span: this.spanFrom(start) });
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
if (this.startsWith("+?")) {
|
|
324
|
+
this.bump(2);
|
|
325
|
+
return Ok({ token: { type: "RepeatOneOrMoreLazy" }, span: this.spanFrom(start) });
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
if (this.startsWith("++")) {
|
|
329
|
+
this.bump(2);
|
|
330
|
+
return Ok({ token: { type: "RepeatOneOrMorePossessive" }, span: this.spanFrom(start) });
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
if (this.startsWith("??")) {
|
|
334
|
+
this.bump(2);
|
|
335
|
+
return Ok({ token: { type: "RepeatZeroOrOneLazy" }, span: this.spanFrom(start) });
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
if (this.startsWith("?+")) {
|
|
339
|
+
this.bump(2);
|
|
340
|
+
return Ok({ token: { type: "RepeatZeroOrOnePossessive" }, span: this.spanFrom(start) });
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
// Single character operators
|
|
344
|
+
switch (ch) {
|
|
345
|
+
case "&":
|
|
346
|
+
this.advance();
|
|
347
|
+
return Ok({ token: { type: "And" }, span: this.spanFrom(start) });
|
|
348
|
+
case "|":
|
|
349
|
+
this.advance();
|
|
350
|
+
return Ok({ token: { type: "Or" }, span: this.spanFrom(start) });
|
|
351
|
+
case "!":
|
|
352
|
+
this.advance();
|
|
353
|
+
return Ok({ token: { type: "Not" }, span: this.spanFrom(start) });
|
|
354
|
+
case "*":
|
|
355
|
+
this.advance();
|
|
356
|
+
return Ok({ token: { type: "RepeatZeroOrMore" }, span: this.spanFrom(start) });
|
|
357
|
+
case "+":
|
|
358
|
+
this.advance();
|
|
359
|
+
return Ok({ token: { type: "RepeatOneOrMore" }, span: this.spanFrom(start) });
|
|
360
|
+
case "?":
|
|
361
|
+
this.advance();
|
|
362
|
+
return Ok({ token: { type: "RepeatZeroOrOne" }, span: this.spanFrom(start) });
|
|
363
|
+
case "(":
|
|
364
|
+
this.advance();
|
|
365
|
+
return Ok({ token: { type: "ParenOpen" }, span: this.spanFrom(start) });
|
|
366
|
+
case ")":
|
|
367
|
+
this.advance();
|
|
368
|
+
return Ok({ token: { type: "ParenClose" }, span: this.spanFrom(start) });
|
|
369
|
+
case "[":
|
|
370
|
+
this.advance();
|
|
371
|
+
return Ok({ token: { type: "BracketOpen" }, span: this.spanFrom(start) });
|
|
372
|
+
case "]":
|
|
373
|
+
this.advance();
|
|
374
|
+
return Ok({ token: { type: "BracketClose" }, span: this.spanFrom(start) });
|
|
375
|
+
case "}":
|
|
376
|
+
this.advance();
|
|
377
|
+
return Ok({ token: { type: "BraceClose" }, span: this.spanFrom(start) });
|
|
378
|
+
case ",":
|
|
379
|
+
this.advance();
|
|
380
|
+
return Ok({ token: { type: "Comma" }, span: this.spanFrom(start) });
|
|
381
|
+
case ":":
|
|
382
|
+
this.advance();
|
|
383
|
+
return Ok({ token: { type: "Colon" }, span: this.spanFrom(start) });
|
|
384
|
+
case ">":
|
|
385
|
+
this.advance();
|
|
386
|
+
return Ok({ token: { type: "GreaterThan" }, span: this.spanFrom(start) });
|
|
387
|
+
case "<":
|
|
388
|
+
this.advance();
|
|
389
|
+
return Ok({ token: { type: "LessThan" }, span: this.spanFrom(start) });
|
|
390
|
+
}
|
|
391
|
+
|
|
392
|
+
// Brace open - may be range or just brace
|
|
393
|
+
if (ch === "{") {
|
|
394
|
+
this.advance();
|
|
395
|
+
return this.parseBraceOpen(start);
|
|
396
|
+
}
|
|
397
|
+
|
|
398
|
+
// String literal
|
|
399
|
+
if (ch === '"') {
|
|
400
|
+
this.advance();
|
|
401
|
+
return this.parseString(start);
|
|
402
|
+
}
|
|
403
|
+
|
|
404
|
+
// Single quoted string
|
|
405
|
+
if (ch === "'") {
|
|
406
|
+
this.advance();
|
|
407
|
+
return this.parseSingleQuoted(start);
|
|
408
|
+
}
|
|
409
|
+
|
|
410
|
+
// Regex
|
|
411
|
+
if (ch === "/") {
|
|
412
|
+
this.advance();
|
|
413
|
+
return this.parseRegex(start);
|
|
414
|
+
}
|
|
415
|
+
|
|
416
|
+
// Group name (@name)
|
|
417
|
+
if (ch === "@") {
|
|
418
|
+
this.advance();
|
|
419
|
+
return this.parseGroupName(start);
|
|
420
|
+
}
|
|
421
|
+
|
|
422
|
+
// Hex string or hex regex (h'...' or h'/.../')
|
|
423
|
+
if (ch === "h" && this.peekAt(1) === "'") {
|
|
424
|
+
this.bump(2);
|
|
425
|
+
// Check if it's a hex regex
|
|
426
|
+
if (this.peek() === "/") {
|
|
427
|
+
this.advance();
|
|
428
|
+
return this.parseHexRegex(start);
|
|
429
|
+
}
|
|
430
|
+
return this.parseHexString(start);
|
|
431
|
+
}
|
|
432
|
+
|
|
433
|
+
// Number literal (including negative)
|
|
434
|
+
if (isDigit(ch) || (ch === "-" && isDigit(this.peekAt(1) ?? ""))) {
|
|
435
|
+
return this.parseNumber(start);
|
|
436
|
+
}
|
|
437
|
+
|
|
438
|
+
// Identifier or keyword (including date' and digest')
|
|
439
|
+
if (isIdentStart(ch)) {
|
|
440
|
+
return this.parseIdentifierOrKeyword(start);
|
|
441
|
+
}
|
|
442
|
+
|
|
443
|
+
// Unrecognized token
|
|
444
|
+
this.advance();
|
|
445
|
+
return Err({ type: "UnrecognizedToken", span: this.spanFrom(start) });
|
|
446
|
+
}
|
|
447
|
+
|
|
448
|
+
/**
|
|
449
|
+
* Tokenizes the entire input and returns all tokens.
|
|
450
|
+
*/
|
|
451
|
+
tokenize(): Result<SpannedToken[]> {
|
|
452
|
+
const tokens: SpannedToken[] = [];
|
|
453
|
+
|
|
454
|
+
while (true) {
|
|
455
|
+
const result = this.next();
|
|
456
|
+
if (result === undefined) {
|
|
457
|
+
break;
|
|
458
|
+
}
|
|
459
|
+
if (!result.ok) {
|
|
460
|
+
return result as Result<SpannedToken[]>;
|
|
461
|
+
}
|
|
462
|
+
tokens.push(result.value);
|
|
463
|
+
}
|
|
464
|
+
|
|
465
|
+
return Ok(tokens);
|
|
466
|
+
}
|
|
467
|
+
|
|
468
|
+
/**
|
|
469
|
+
* Parse { - could be BraceOpen or Range.
|
|
470
|
+
*/
|
|
471
|
+
private parseBraceOpen(start: number): Result<SpannedToken> {
|
|
472
|
+
// Look ahead to see if this is a range pattern
|
|
473
|
+
const remainder = this.remainder();
|
|
474
|
+
|
|
475
|
+
// Skip whitespace and check for digit
|
|
476
|
+
let pos = 0;
|
|
477
|
+
while (pos < remainder.length && isWhitespace(remainder[pos])) {
|
|
478
|
+
pos++;
|
|
479
|
+
}
|
|
480
|
+
|
|
481
|
+
if (pos < remainder.length && isDigit(remainder[pos])) {
|
|
482
|
+
// Check if it looks like a range pattern
|
|
483
|
+
if (this.looksLikeRangePattern(remainder.slice(pos))) {
|
|
484
|
+
return this.parseRange(start);
|
|
485
|
+
}
|
|
486
|
+
}
|
|
487
|
+
|
|
488
|
+
return Ok({ token: { type: "BraceOpen" }, span: this.spanFrom(start) });
|
|
489
|
+
}
|
|
490
|
+
|
|
491
|
+
/**
|
|
492
|
+
* Check if content looks like a range pattern.
|
|
493
|
+
*/
|
|
494
|
+
private looksLikeRangePattern(content: string): boolean {
|
|
495
|
+
let i = 0;
|
|
496
|
+
|
|
497
|
+
// Skip whitespace
|
|
498
|
+
while (i < content.length && isWhitespace(content[i])) {
|
|
499
|
+
i++;
|
|
500
|
+
}
|
|
501
|
+
|
|
502
|
+
// Need at least one digit
|
|
503
|
+
if (i >= content.length || !isDigit(content[i])) {
|
|
504
|
+
return false;
|
|
505
|
+
}
|
|
506
|
+
|
|
507
|
+
// Skip digits
|
|
508
|
+
while (i < content.length && isDigit(content[i])) {
|
|
509
|
+
i++;
|
|
510
|
+
}
|
|
511
|
+
|
|
512
|
+
// Skip whitespace
|
|
513
|
+
while (i < content.length && isWhitespace(content[i])) {
|
|
514
|
+
i++;
|
|
515
|
+
}
|
|
516
|
+
|
|
517
|
+
// After digits, should see comma or closing brace
|
|
518
|
+
// If we see a colon, it's a map constraint, not a range
|
|
519
|
+
if (i < content.length) {
|
|
520
|
+
const ch = content[i];
|
|
521
|
+
if (ch === ":") {
|
|
522
|
+
return false;
|
|
523
|
+
}
|
|
524
|
+
return ch === "," || ch === "}";
|
|
525
|
+
}
|
|
526
|
+
|
|
527
|
+
return false;
|
|
528
|
+
}
|
|
529
|
+
|
|
530
|
+
/**
|
|
531
|
+
* Parse a range pattern like {1,5} or {3,} or {5}.
|
|
532
|
+
*/
|
|
533
|
+
private parseRange(start: number): Result<SpannedToken> {
|
|
534
|
+
// Skip whitespace
|
|
535
|
+
this.skipWhitespace();
|
|
536
|
+
|
|
537
|
+
// Parse first number
|
|
538
|
+
const minStart = this.#position;
|
|
539
|
+
let peeked = this.peek();
|
|
540
|
+
while (peeked !== undefined && isDigit(peeked)) {
|
|
541
|
+
this.advance();
|
|
542
|
+
peeked = this.peek();
|
|
543
|
+
}
|
|
544
|
+
|
|
545
|
+
if (this.#position === minStart) {
|
|
546
|
+
return Err({ type: "InvalidRange", span: this.spanFrom(start) });
|
|
547
|
+
}
|
|
548
|
+
|
|
549
|
+
const min = parseInt(this.#input.slice(minStart, this.#position), 10);
|
|
550
|
+
|
|
551
|
+
this.skipWhitespace();
|
|
552
|
+
|
|
553
|
+
let max: number | undefined;
|
|
554
|
+
|
|
555
|
+
const nextCh = this.peek();
|
|
556
|
+
if (nextCh === ",") {
|
|
557
|
+
this.advance();
|
|
558
|
+
this.skipWhitespace();
|
|
559
|
+
|
|
560
|
+
const afterComma = this.peek();
|
|
561
|
+
if (afterComma === "}") {
|
|
562
|
+
// Unbounded: {n,}
|
|
563
|
+
this.advance();
|
|
564
|
+
max = undefined;
|
|
565
|
+
} else if (afterComma !== undefined && isDigit(afterComma)) {
|
|
566
|
+
// Bounded: {n,m}
|
|
567
|
+
const maxStart = this.#position;
|
|
568
|
+
let maxPeeked = this.peek();
|
|
569
|
+
while (maxPeeked !== undefined && isDigit(maxPeeked)) {
|
|
570
|
+
this.advance();
|
|
571
|
+
maxPeeked = this.peek();
|
|
572
|
+
}
|
|
573
|
+
max = parseInt(this.#input.slice(maxStart, this.#position), 10);
|
|
574
|
+
|
|
575
|
+
this.skipWhitespace();
|
|
576
|
+
if (this.peek() !== "}") {
|
|
577
|
+
return Err({ type: "InvalidRange", span: this.spanFrom(start) });
|
|
578
|
+
}
|
|
579
|
+
this.advance();
|
|
580
|
+
} else {
|
|
581
|
+
return Err({ type: "InvalidRange", span: this.spanFrom(start) });
|
|
582
|
+
}
|
|
583
|
+
} else if (nextCh === "}") {
|
|
584
|
+
// Exact: {n}
|
|
585
|
+
this.advance();
|
|
586
|
+
max = min;
|
|
587
|
+
} else {
|
|
588
|
+
return Err({ type: "InvalidRange", span: this.spanFrom(start) });
|
|
589
|
+
}
|
|
590
|
+
|
|
591
|
+
// Check for reluctance modifier
|
|
592
|
+
let reluctance = Reluctance.Greedy;
|
|
593
|
+
const modCh = this.peek();
|
|
594
|
+
if (modCh === "?") {
|
|
595
|
+
this.advance();
|
|
596
|
+
reluctance = Reluctance.Lazy;
|
|
597
|
+
} else if (modCh === "+") {
|
|
598
|
+
this.advance();
|
|
599
|
+
reluctance = Reluctance.Possessive;
|
|
600
|
+
}
|
|
601
|
+
|
|
602
|
+
// Validate range
|
|
603
|
+
if (max !== undefined && min > max) {
|
|
604
|
+
return Err({ type: "InvalidRange", span: this.spanFrom(start) });
|
|
605
|
+
}
|
|
606
|
+
|
|
607
|
+
const quantifier =
|
|
608
|
+
max !== undefined
|
|
609
|
+
? Quantifier.between(min, max, reluctance)
|
|
610
|
+
: Quantifier.atLeast(min, reluctance);
|
|
611
|
+
|
|
612
|
+
return Ok({ token: { type: "Range", quantifier }, span: this.spanFrom(start) });
|
|
613
|
+
}
|
|
614
|
+
|
|
615
|
+
/**
|
|
616
|
+
* Parse a string literal.
|
|
617
|
+
*/
|
|
618
|
+
private parseString(start: number): Result<SpannedToken> {
|
|
619
|
+
let result = "";
|
|
620
|
+
let escape = false;
|
|
621
|
+
|
|
622
|
+
while (this.#position < this.#input.length) {
|
|
623
|
+
const ch = this.advance() ?? "";
|
|
624
|
+
|
|
625
|
+
if (escape) {
|
|
626
|
+
switch (ch) {
|
|
627
|
+
case '"':
|
|
628
|
+
result += '"';
|
|
629
|
+
break;
|
|
630
|
+
case "\\":
|
|
631
|
+
result += "\\";
|
|
632
|
+
break;
|
|
633
|
+
case "n":
|
|
634
|
+
result += "\n";
|
|
635
|
+
break;
|
|
636
|
+
case "r":
|
|
637
|
+
result += "\r";
|
|
638
|
+
break;
|
|
639
|
+
case "t":
|
|
640
|
+
result += "\t";
|
|
641
|
+
break;
|
|
642
|
+
default:
|
|
643
|
+
result += "\\";
|
|
644
|
+
result += ch;
|
|
645
|
+
break;
|
|
646
|
+
}
|
|
647
|
+
escape = false;
|
|
648
|
+
} else if (ch === "\\") {
|
|
649
|
+
escape = true;
|
|
650
|
+
} else if (ch === '"') {
|
|
651
|
+
return Ok({ token: { type: "StringLiteral", value: result }, span: this.spanFrom(start) });
|
|
652
|
+
} else {
|
|
653
|
+
result += ch;
|
|
654
|
+
}
|
|
655
|
+
}
|
|
656
|
+
|
|
657
|
+
return Err({ type: "UnterminatedString", span: this.spanFrom(start) });
|
|
658
|
+
}
|
|
659
|
+
|
|
660
|
+
/**
|
|
661
|
+
* Parse a single-quoted string.
|
|
662
|
+
*/
|
|
663
|
+
private parseSingleQuoted(start: number): Result<SpannedToken> {
|
|
664
|
+
let result = "";
|
|
665
|
+
let escape = false;
|
|
666
|
+
|
|
667
|
+
while (this.#position < this.#input.length) {
|
|
668
|
+
const ch = this.advance() ?? "";
|
|
669
|
+
|
|
670
|
+
if (escape) {
|
|
671
|
+
switch (ch) {
|
|
672
|
+
case "'":
|
|
673
|
+
result += "'";
|
|
674
|
+
break;
|
|
675
|
+
case "\\":
|
|
676
|
+
result += "\\";
|
|
677
|
+
break;
|
|
678
|
+
case "n":
|
|
679
|
+
result += "\n";
|
|
680
|
+
break;
|
|
681
|
+
case "r":
|
|
682
|
+
result += "\r";
|
|
683
|
+
break;
|
|
684
|
+
case "t":
|
|
685
|
+
result += "\t";
|
|
686
|
+
break;
|
|
687
|
+
default:
|
|
688
|
+
result += "\\";
|
|
689
|
+
result += ch;
|
|
690
|
+
break;
|
|
691
|
+
}
|
|
692
|
+
escape = false;
|
|
693
|
+
} else if (ch === "\\") {
|
|
694
|
+
escape = true;
|
|
695
|
+
} else if (ch === "'") {
|
|
696
|
+
return Ok({ token: { type: "SingleQuoted", value: result }, span: this.spanFrom(start) });
|
|
697
|
+
} else {
|
|
698
|
+
result += ch;
|
|
699
|
+
}
|
|
700
|
+
}
|
|
701
|
+
|
|
702
|
+
return Err({ type: "UnterminatedString", span: this.spanFrom(start) });
|
|
703
|
+
}
|
|
704
|
+
|
|
705
|
+
/**
|
|
706
|
+
* Parse a regex pattern.
|
|
707
|
+
*/
|
|
708
|
+
private parseRegex(start: number): Result<SpannedToken> {
|
|
709
|
+
let pattern = "";
|
|
710
|
+
let escape = false;
|
|
711
|
+
|
|
712
|
+
while (this.#position < this.#input.length) {
|
|
713
|
+
const ch = this.advance() ?? "";
|
|
714
|
+
|
|
715
|
+
if (escape) {
|
|
716
|
+
pattern += ch;
|
|
717
|
+
escape = false;
|
|
718
|
+
} else if (ch === "\\") {
|
|
719
|
+
pattern += ch;
|
|
720
|
+
escape = true;
|
|
721
|
+
} else if (ch === "/") {
|
|
722
|
+
// Validate regex
|
|
723
|
+
try {
|
|
724
|
+
new RegExp(pattern);
|
|
725
|
+
} catch {
|
|
726
|
+
return Err({ type: "InvalidRegex", span: this.spanFrom(start) });
|
|
727
|
+
}
|
|
728
|
+
return Ok({ token: { type: "Regex", pattern }, span: this.spanFrom(start) });
|
|
729
|
+
} else {
|
|
730
|
+
pattern += ch;
|
|
731
|
+
}
|
|
732
|
+
}
|
|
733
|
+
|
|
734
|
+
return Err({ type: "UnterminatedRegex", span: this.spanFrom(start) });
|
|
735
|
+
}
|
|
736
|
+
|
|
737
|
+
/**
|
|
738
|
+
* Parse a group name.
|
|
739
|
+
*/
|
|
740
|
+
private parseGroupName(start: number): Result<SpannedToken> {
|
|
741
|
+
const nameStart = this.#position;
|
|
742
|
+
|
|
743
|
+
// First char must be identifier start
|
|
744
|
+
if (!isIdentStart(this.peek() ?? "")) {
|
|
745
|
+
return Err({ type: "InvalidCaptureGroupName", name: "", span: this.spanFrom(start) });
|
|
746
|
+
}
|
|
747
|
+
|
|
748
|
+
let identCh = this.peek();
|
|
749
|
+
while (identCh !== undefined && isIdentCont(identCh)) {
|
|
750
|
+
this.advance();
|
|
751
|
+
identCh = this.peek();
|
|
752
|
+
}
|
|
753
|
+
|
|
754
|
+
const name = this.#input.slice(nameStart, this.#position);
|
|
755
|
+
return Ok({ token: { type: "GroupName", name }, span: this.spanFrom(start) });
|
|
756
|
+
}
|
|
757
|
+
|
|
758
|
+
/**
|
|
759
|
+
* Parse a hex string.
|
|
760
|
+
*/
|
|
761
|
+
private parseHexString(start: number): Result<SpannedToken> {
|
|
762
|
+
let hex = "";
|
|
763
|
+
|
|
764
|
+
while (this.#position < this.#input.length) {
|
|
765
|
+
const ch = this.peek() ?? "";
|
|
766
|
+
|
|
767
|
+
if (ch === "'") {
|
|
768
|
+
this.advance();
|
|
769
|
+
const bytes = hexToBytes(hex);
|
|
770
|
+
if (bytes === undefined) {
|
|
771
|
+
return Err({ type: "InvalidHexString", span: this.spanFrom(start) });
|
|
772
|
+
}
|
|
773
|
+
return Ok({ token: { type: "HexString", value: bytes }, span: this.spanFrom(start) });
|
|
774
|
+
} else if (isHexDigit(ch)) {
|
|
775
|
+
hex += ch;
|
|
776
|
+
this.advance();
|
|
777
|
+
} else {
|
|
778
|
+
return Err({ type: "InvalidHexString", span: this.spanFrom(start) });
|
|
779
|
+
}
|
|
780
|
+
}
|
|
781
|
+
|
|
782
|
+
return Err({ type: "UnterminatedHexString", span: this.spanFrom(start) });
|
|
783
|
+
}
|
|
784
|
+
|
|
785
|
+
/**
|
|
786
|
+
* Parse a hex regex pattern.
|
|
787
|
+
*/
|
|
788
|
+
private parseHexRegex(start: number): Result<SpannedToken> {
|
|
789
|
+
let pattern = "";
|
|
790
|
+
let escape = false;
|
|
791
|
+
|
|
792
|
+
while (this.#position < this.#input.length) {
|
|
793
|
+
const ch = this.advance() ?? "";
|
|
794
|
+
|
|
795
|
+
if (escape) {
|
|
796
|
+
pattern += ch;
|
|
797
|
+
escape = false;
|
|
798
|
+
} else if (ch === "\\") {
|
|
799
|
+
pattern += ch;
|
|
800
|
+
escape = true;
|
|
801
|
+
} else if (ch === "/") {
|
|
802
|
+
// Check for closing '
|
|
803
|
+
if (this.peek() === "'") {
|
|
804
|
+
this.advance();
|
|
805
|
+
// Validate regex
|
|
806
|
+
try {
|
|
807
|
+
new RegExp(pattern);
|
|
808
|
+
} catch {
|
|
809
|
+
return Err({ type: "InvalidRegex", span: this.spanFrom(start) });
|
|
810
|
+
}
|
|
811
|
+
return Ok({ token: { type: "HexRegex", pattern }, span: this.spanFrom(start) });
|
|
812
|
+
}
|
|
813
|
+
pattern += ch;
|
|
814
|
+
} else {
|
|
815
|
+
pattern += ch;
|
|
816
|
+
}
|
|
817
|
+
}
|
|
818
|
+
|
|
819
|
+
return Err({ type: "UnterminatedRegex", span: this.spanFrom(start) });
|
|
820
|
+
}
|
|
821
|
+
|
|
822
|
+
/**
|
|
823
|
+
* Parse a number literal.
|
|
824
|
+
*/
|
|
825
|
+
private parseNumber(start: number): Result<SpannedToken> {
|
|
826
|
+
const numStart = this.#position;
|
|
827
|
+
|
|
828
|
+
// Optional negative sign
|
|
829
|
+
if (this.peek() === "-") {
|
|
830
|
+
this.advance();
|
|
831
|
+
}
|
|
832
|
+
|
|
833
|
+
// Integer part
|
|
834
|
+
if (this.peek() === "0") {
|
|
835
|
+
this.advance();
|
|
836
|
+
} else if (isDigit(this.peek() ?? "")) {
|
|
837
|
+
while (isDigit(this.peek() ?? "")) {
|
|
838
|
+
this.advance();
|
|
839
|
+
}
|
|
840
|
+
} else {
|
|
841
|
+
return Err({ type: "InvalidNumberFormat", span: this.spanFrom(start) });
|
|
842
|
+
}
|
|
843
|
+
|
|
844
|
+
// Fractional part (but not if it's the start of a range like 1..10)
|
|
845
|
+
if (this.peek() === "." && this.peekAt(1) !== ".") {
|
|
846
|
+
this.advance();
|
|
847
|
+
if (!isDigit(this.peek() ?? "")) {
|
|
848
|
+
return Err({ type: "InvalidNumberFormat", span: this.spanFrom(start) });
|
|
849
|
+
}
|
|
850
|
+
while (isDigit(this.peek() ?? "")) {
|
|
851
|
+
this.advance();
|
|
852
|
+
}
|
|
853
|
+
}
|
|
854
|
+
|
|
855
|
+
// Exponent part
|
|
856
|
+
if (this.peek() === "e" || this.peek() === "E") {
|
|
857
|
+
this.advance();
|
|
858
|
+
if (this.peek() === "+" || this.peek() === "-") {
|
|
859
|
+
this.advance();
|
|
860
|
+
}
|
|
861
|
+
if (!isDigit(this.peek() ?? "")) {
|
|
862
|
+
return Err({ type: "InvalidNumberFormat", span: this.spanFrom(start) });
|
|
863
|
+
}
|
|
864
|
+
while (isDigit(this.peek() ?? "")) {
|
|
865
|
+
this.advance();
|
|
866
|
+
}
|
|
867
|
+
}
|
|
868
|
+
|
|
869
|
+
const numStr = this.#input.slice(numStart, this.#position);
|
|
870
|
+
const value = parseFloat(numStr);
|
|
871
|
+
|
|
872
|
+
if (!isFinite(value)) {
|
|
873
|
+
return Err({ type: "InvalidNumberFormat", span: this.spanFrom(start) });
|
|
874
|
+
}
|
|
875
|
+
|
|
876
|
+
return Ok({ token: { type: "NumberLiteral", value }, span: this.spanFrom(start) });
|
|
877
|
+
}
|
|
878
|
+
|
|
879
|
+
/**
|
|
880
|
+
* Parse an identifier or keyword.
|
|
881
|
+
*/
|
|
882
|
+
private parseIdentifierOrKeyword(start: number): Result<SpannedToken> {
|
|
883
|
+
const identStart = this.#position;
|
|
884
|
+
|
|
885
|
+
let identCh = this.peek();
|
|
886
|
+
while (identCh !== undefined && isIdentCont(identCh)) {
|
|
887
|
+
this.advance();
|
|
888
|
+
identCh = this.peek();
|
|
889
|
+
}
|
|
890
|
+
|
|
891
|
+
const ident = this.#input.slice(identStart, this.#position);
|
|
892
|
+
|
|
893
|
+
// Check for special quoted patterns
|
|
894
|
+
if (ident === "date" && this.peek() === "'") {
|
|
895
|
+
this.advance();
|
|
896
|
+
return this.parseDateQuoted(start);
|
|
897
|
+
}
|
|
898
|
+
|
|
899
|
+
if (ident === "digest" && this.peek() === "'") {
|
|
900
|
+
this.advance();
|
|
901
|
+
return this.parseDigestQuoted(start);
|
|
902
|
+
}
|
|
903
|
+
|
|
904
|
+
// Check for keyword
|
|
905
|
+
const keyword = KEYWORDS[ident];
|
|
906
|
+
if (keyword !== undefined) {
|
|
907
|
+
return Ok({ token: keyword, span: this.spanFrom(start) });
|
|
908
|
+
}
|
|
909
|
+
|
|
910
|
+
// Unknown identifier - treat as unrecognized
|
|
911
|
+
return Err({ type: "UnrecognizedToken", span: this.spanFrom(start) });
|
|
912
|
+
}
|
|
913
|
+
|
|
914
|
+
/**
|
|
915
|
+
* Parse a date quoted pattern.
|
|
916
|
+
*/
|
|
917
|
+
private parseDateQuoted(start: number): Result<SpannedToken> {
|
|
918
|
+
let content = "";
|
|
919
|
+
|
|
920
|
+
while (this.#position < this.#input.length) {
|
|
921
|
+
const ch = this.advance() ?? "";
|
|
922
|
+
|
|
923
|
+
if (ch === "'") {
|
|
924
|
+
if (content.length === 0) {
|
|
925
|
+
return Err({ type: "InvalidDateFormat", span: this.spanFrom(start) });
|
|
926
|
+
}
|
|
927
|
+
return Ok({ token: { type: "DateQuoted", value: content }, span: this.spanFrom(start) });
|
|
928
|
+
}
|
|
929
|
+
content += ch;
|
|
930
|
+
}
|
|
931
|
+
|
|
932
|
+
return Err({ type: "UnterminatedDateQuoted", span: this.spanFrom(start) });
|
|
933
|
+
}
|
|
934
|
+
|
|
935
|
+
/**
|
|
936
|
+
* Parse a digest quoted pattern.
|
|
937
|
+
*/
|
|
938
|
+
private parseDigestQuoted(start: number): Result<SpannedToken> {
|
|
939
|
+
let content = "";
|
|
940
|
+
|
|
941
|
+
while (this.#position < this.#input.length) {
|
|
942
|
+
const ch = this.advance() ?? "";
|
|
943
|
+
|
|
944
|
+
if (ch === "'") {
|
|
945
|
+
if (content.length === 0) {
|
|
946
|
+
return Err({
|
|
947
|
+
type: "InvalidDigestPattern",
|
|
948
|
+
message: "empty content",
|
|
949
|
+
span: this.spanFrom(start),
|
|
950
|
+
});
|
|
951
|
+
}
|
|
952
|
+
return Ok({ token: { type: "DigestQuoted", value: content }, span: this.spanFrom(start) });
|
|
953
|
+
}
|
|
954
|
+
content += ch;
|
|
955
|
+
}
|
|
956
|
+
|
|
957
|
+
return Err({ type: "UnterminatedDigestQuoted", span: this.spanFrom(start) });
|
|
958
|
+
}
|
|
959
|
+
|
|
960
|
+
/**
|
|
961
|
+
* Peeks at the next token without consuming it.
|
|
962
|
+
* Returns a Result with the token or undefined if at end of input.
|
|
963
|
+
*/
|
|
964
|
+
peekToken(): Result<Token> | undefined {
|
|
965
|
+
const savedPosition = this.#position;
|
|
966
|
+
const result = this.next();
|
|
967
|
+
this.#position = savedPosition;
|
|
968
|
+
|
|
969
|
+
if (result === undefined) {
|
|
970
|
+
return undefined;
|
|
971
|
+
}
|
|
972
|
+
|
|
973
|
+
if (!result.ok) {
|
|
974
|
+
return result;
|
|
975
|
+
}
|
|
976
|
+
|
|
977
|
+
return Ok(result.value.token);
|
|
978
|
+
}
|
|
979
|
+
|
|
980
|
+
/**
|
|
981
|
+
* Returns the current span (position to position).
|
|
982
|
+
*/
|
|
983
|
+
span(): Span {
|
|
984
|
+
return span(this.#position, this.#position);
|
|
985
|
+
}
|
|
986
|
+
|
|
987
|
+
/**
|
|
988
|
+
* Returns the last token's span.
|
|
989
|
+
*/
|
|
990
|
+
lastSpan(): Span {
|
|
991
|
+
// This is a simplification - in reality we'd track the last span
|
|
992
|
+
return span(this.#position, this.#position);
|
|
993
|
+
}
|
|
994
|
+
}
|
|
995
|
+
|
|
996
|
+
// Re-export Span
|
|
997
|
+
export type { Span };
|