@bcts/dcbor-pattern 1.0.0-alpha.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. package/LICENSE +48 -0
  2. package/README.md +14 -0
  3. package/dist/index.cjs +6561 -0
  4. package/dist/index.cjs.map +1 -0
  5. package/dist/index.d.cts +2732 -0
  6. package/dist/index.d.cts.map +1 -0
  7. package/dist/index.d.mts +2732 -0
  8. package/dist/index.d.mts.map +1 -0
  9. package/dist/index.iife.js +6562 -0
  10. package/dist/index.iife.js.map +1 -0
  11. package/dist/index.mjs +6244 -0
  12. package/dist/index.mjs.map +1 -0
  13. package/package.json +85 -0
  14. package/src/error.ts +333 -0
  15. package/src/format.ts +299 -0
  16. package/src/index.ts +20 -0
  17. package/src/interval.ts +230 -0
  18. package/src/parse/index.ts +95 -0
  19. package/src/parse/meta/and-parser.ts +47 -0
  20. package/src/parse/meta/capture-parser.ts +56 -0
  21. package/src/parse/meta/index.ts +13 -0
  22. package/src/parse/meta/not-parser.ts +28 -0
  23. package/src/parse/meta/or-parser.ts +47 -0
  24. package/src/parse/meta/primary-parser.ts +420 -0
  25. package/src/parse/meta/repeat-parser.ts +133 -0
  26. package/src/parse/meta/search-parser.ts +56 -0
  27. package/src/parse/parse-registry.ts +31 -0
  28. package/src/parse/structure/array-parser.ts +210 -0
  29. package/src/parse/structure/index.ts +9 -0
  30. package/src/parse/structure/map-parser.ts +128 -0
  31. package/src/parse/structure/tagged-parser.ts +269 -0
  32. package/src/parse/token.ts +997 -0
  33. package/src/parse/value/bool-parser.ts +33 -0
  34. package/src/parse/value/bytestring-parser.ts +42 -0
  35. package/src/parse/value/date-parser.ts +24 -0
  36. package/src/parse/value/digest-parser.ts +24 -0
  37. package/src/parse/value/index.ts +14 -0
  38. package/src/parse/value/known-value-parser.ts +24 -0
  39. package/src/parse/value/null-parser.ts +19 -0
  40. package/src/parse/value/number-parser.ts +19 -0
  41. package/src/parse/value/text-parser.ts +43 -0
  42. package/src/pattern/index.ts +740 -0
  43. package/src/pattern/match-registry.ts +137 -0
  44. package/src/pattern/matcher.ts +388 -0
  45. package/src/pattern/meta/and-pattern.ts +56 -0
  46. package/src/pattern/meta/any-pattern.ts +43 -0
  47. package/src/pattern/meta/capture-pattern.ts +57 -0
  48. package/src/pattern/meta/index.ts +168 -0
  49. package/src/pattern/meta/not-pattern.ts +70 -0
  50. package/src/pattern/meta/or-pattern.ts +56 -0
  51. package/src/pattern/meta/repeat-pattern.ts +117 -0
  52. package/src/pattern/meta/search-pattern.ts +298 -0
  53. package/src/pattern/meta/sequence-pattern.ts +72 -0
  54. package/src/pattern/structure/array-pattern/assigner.ts +95 -0
  55. package/src/pattern/structure/array-pattern/backtrack.ts +240 -0
  56. package/src/pattern/structure/array-pattern/helpers.ts +140 -0
  57. package/src/pattern/structure/array-pattern/index.ts +502 -0
  58. package/src/pattern/structure/index.ts +122 -0
  59. package/src/pattern/structure/map-pattern.ts +255 -0
  60. package/src/pattern/structure/tagged-pattern.ts +190 -0
  61. package/src/pattern/value/bool-pattern.ts +67 -0
  62. package/src/pattern/value/bytes-utils.ts +48 -0
  63. package/src/pattern/value/bytestring-pattern.ts +111 -0
  64. package/src/pattern/value/date-pattern.ts +162 -0
  65. package/src/pattern/value/digest-pattern.ts +136 -0
  66. package/src/pattern/value/index.ts +168 -0
  67. package/src/pattern/value/known-value-pattern.ts +123 -0
  68. package/src/pattern/value/null-pattern.ts +46 -0
  69. package/src/pattern/value/number-pattern.ts +181 -0
  70. package/src/pattern/value/text-pattern.ts +82 -0
  71. package/src/pattern/vm.ts +619 -0
  72. package/src/quantifier.ts +185 -0
  73. package/src/reluctance.ts +65 -0
@@ -0,0 +1,997 @@
1
+ /**
2
+ * Token types and Lexer for the dCBOR pattern language.
3
+ *
4
+ * This module provides tokenization for dCBOR pattern expressions,
5
+ * converting input strings into a sequence of tokens for parsing.
6
+ *
7
+ * @module parse/token
8
+ */
9
+
10
+ import { type Span, span, type Result, Ok, Err } from "../error";
11
+ import { Quantifier } from "../quantifier";
12
+ import { Reluctance } from "../reluctance";
13
+
14
+ /**
15
+ * Token types for dCBOR pattern parsing.
16
+ *
17
+ * This is a discriminated union matching the Rust Token enum.
18
+ */
19
+ export type Token =
20
+ // Operators
21
+ | { readonly type: "And" }
22
+ | { readonly type: "Or" }
23
+ | { readonly type: "Not" }
24
+
25
+ // Quantifiers
26
+ | { readonly type: "RepeatZeroOrMore" }
27
+ | { readonly type: "RepeatZeroOrMoreLazy" }
28
+ | { readonly type: "RepeatZeroOrMorePossessive" }
29
+ | { readonly type: "RepeatOneOrMore" }
30
+ | { readonly type: "RepeatOneOrMoreLazy" }
31
+ | { readonly type: "RepeatOneOrMorePossessive" }
32
+ | { readonly type: "RepeatZeroOrOne" }
33
+ | { readonly type: "RepeatZeroOrOneLazy" }
34
+ | { readonly type: "RepeatZeroOrOnePossessive" }
35
+
36
+ // Structure keywords
37
+ | { readonly type: "Tagged" }
38
+ | { readonly type: "Array" }
39
+ | { readonly type: "Map" }
40
+
41
+ // Value keywords
42
+ | { readonly type: "Bool" }
43
+ | { readonly type: "ByteString" }
44
+ | { readonly type: "Date" }
45
+ | { readonly type: "Known" }
46
+ | { readonly type: "Null" }
47
+ | { readonly type: "Number" }
48
+ | { readonly type: "Text" }
49
+ | { readonly type: "Digest" }
50
+ | { readonly type: "Search" }
51
+
52
+ // Literals
53
+ | { readonly type: "BoolTrue" }
54
+ | { readonly type: "BoolFalse" }
55
+ | { readonly type: "NaN" }
56
+ | { readonly type: "Infinity" }
57
+ | { readonly type: "NegInfinity" }
58
+
59
+ // Delimiters
60
+ | { readonly type: "ParenOpen" }
61
+ | { readonly type: "ParenClose" }
62
+ | { readonly type: "BracketOpen" }
63
+ | { readonly type: "BracketClose" }
64
+ | { readonly type: "BraceOpen" }
65
+ | { readonly type: "BraceClose" }
66
+ | { readonly type: "Comma" }
67
+ | { readonly type: "Colon" }
68
+ | { readonly type: "Ellipsis" }
69
+
70
+ // Comparisons
71
+ | { readonly type: "GreaterThanOrEqual" }
72
+ | { readonly type: "LessThanOrEqual" }
73
+ | { readonly type: "GreaterThan" }
74
+ | { readonly type: "LessThan" }
75
+
76
+ // Complex literals
77
+ | { readonly type: "NumberLiteral"; readonly value: number }
78
+ | { readonly type: "GroupName"; readonly name: string }
79
+ | { readonly type: "StringLiteral"; readonly value: string }
80
+ | { readonly type: "SingleQuoted"; readonly value: string }
81
+ | { readonly type: "Regex"; readonly pattern: string }
82
+ | { readonly type: "HexString"; readonly value: Uint8Array }
83
+ | { readonly type: "HexRegex"; readonly pattern: string }
84
+ | { readonly type: "DateQuoted"; readonly value: string }
85
+ | { readonly type: "DigestQuoted"; readonly value: string }
86
+ | { readonly type: "Range"; readonly quantifier: Quantifier };
87
+
88
+ /**
89
+ * A token with its position in the source.
90
+ */
91
+ export interface SpannedToken {
92
+ readonly token: Token;
93
+ readonly span: Span;
94
+ }
95
+
96
+ /**
97
+ * Simple keywords that map directly to tokens.
98
+ */
99
+ const KEYWORDS: Record<string, Token> = {
100
+ // Structure keywords
101
+ tagged: { type: "Tagged" },
102
+ array: { type: "Array" },
103
+ map: { type: "Map" },
104
+
105
+ // Value keywords
106
+ bool: { type: "Bool" },
107
+ bstr: { type: "ByteString" },
108
+ date: { type: "Date" },
109
+ known: { type: "Known" },
110
+ null: { type: "Null" },
111
+ number: { type: "Number" },
112
+ text: { type: "Text" },
113
+ digest: { type: "Digest" },
114
+ search: { type: "Search" },
115
+
116
+ // Boolean literals
117
+ true: { type: "BoolTrue" },
118
+ false: { type: "BoolFalse" },
119
+
120
+ // Special values
121
+ NaN: { type: "NaN" },
122
+ Infinity: { type: "Infinity" },
123
+ };
124
+
125
+ /**
126
+ * Check if a character is whitespace.
127
+ */
128
+ const isWhitespace = (ch: string): boolean => {
129
+ return ch === " " || ch === "\t" || ch === "\r" || ch === "\n" || ch === "\f";
130
+ };
131
+
132
+ /**
133
+ * Check if a character is a digit.
134
+ */
135
+ const isDigit = (ch: string): boolean => {
136
+ return ch >= "0" && ch <= "9";
137
+ };
138
+
139
+ /**
140
+ * Check if a character is a hex digit.
141
+ */
142
+ const isHexDigit = (ch: string): boolean => {
143
+ return (ch >= "0" && ch <= "9") || (ch >= "a" && ch <= "f") || (ch >= "A" && ch <= "F");
144
+ };
145
+
146
+ /**
147
+ * Check if a character is an identifier start character.
148
+ */
149
+ const isIdentStart = (ch: string): boolean => {
150
+ return (ch >= "a" && ch <= "z") || (ch >= "A" && ch <= "Z") || ch === "_";
151
+ };
152
+
153
+ /**
154
+ * Check if a character is an identifier continuation character.
155
+ */
156
+ const isIdentCont = (ch: string): boolean => {
157
+ return isIdentStart(ch) || isDigit(ch);
158
+ };
159
+
160
+ /**
161
+ * Parse a hex string to bytes.
162
+ */
163
+ const hexToBytes = (hex: string): Uint8Array | undefined => {
164
+ if (hex.length % 2 !== 0) {
165
+ return undefined;
166
+ }
167
+ const bytes = new Uint8Array(hex.length / 2);
168
+ for (let i = 0; i < hex.length; i += 2) {
169
+ const byte = parseInt(hex.slice(i, i + 2), 16);
170
+ if (isNaN(byte)) {
171
+ return undefined;
172
+ }
173
+ bytes[i / 2] = byte;
174
+ }
175
+ return bytes;
176
+ };
177
+
178
+ /**
179
+ * Lexer state for tokenizing dCBOR pattern expressions.
180
+ */
181
+ export class Lexer {
182
+ readonly #input: string;
183
+ #position: number;
184
+
185
+ constructor(input: string) {
186
+ this.#input = input;
187
+ this.#position = 0;
188
+ }
189
+
190
+ /**
191
+ * Creates a new lexer for the given input.
192
+ */
193
+ static new(input: string): Lexer {
194
+ return new Lexer(input);
195
+ }
196
+
197
+ /**
198
+ * Returns the input string.
199
+ */
200
+ input(): string {
201
+ return this.#input;
202
+ }
203
+
204
+ /**
205
+ * Returns the current position in the input.
206
+ */
207
+ position(): number {
208
+ return this.#position;
209
+ }
210
+
211
+ /**
212
+ * Returns the remaining input.
213
+ */
214
+ remainder(): string {
215
+ return this.#input.slice(this.#position);
216
+ }
217
+
218
+ /**
219
+ * Peeks at the current character without consuming it.
220
+ */
221
+ peek(): string | undefined {
222
+ return this.#input[this.#position];
223
+ }
224
+
225
+ /**
226
+ * Peeks at the character at offset from current position.
227
+ */
228
+ peekAt(offset: number): string | undefined {
229
+ return this.#input[this.#position + offset];
230
+ }
231
+
232
+ /**
233
+ * Consumes and returns the current character.
234
+ */
235
+ advance(): string | undefined {
236
+ const ch = this.#input[this.#position];
237
+ if (ch !== undefined) {
238
+ this.#position++;
239
+ }
240
+ return ch;
241
+ }
242
+
243
+ /**
244
+ * Advances by n characters.
245
+ */
246
+ bump(n: number): void {
247
+ this.#position += n;
248
+ }
249
+
250
+ /**
251
+ * Creates a span from start to current position.
252
+ */
253
+ spanFrom(start: number): Span {
254
+ return span(start, this.#position);
255
+ }
256
+
257
+ /**
258
+ * Skips whitespace characters.
259
+ */
260
+ skipWhitespace(): void {
261
+ while (this.#position < this.#input.length && isWhitespace(this.#input[this.#position])) {
262
+ this.#position++;
263
+ }
264
+ }
265
+
266
+ /**
267
+ * Checks if the remainder starts with the given string.
268
+ */
269
+ startsWith(s: string): boolean {
270
+ return this.#input.slice(this.#position).startsWith(s);
271
+ }
272
+
273
+ /**
274
+ * Gets the next token.
275
+ */
276
+ next(): Result<SpannedToken> | undefined {
277
+ this.skipWhitespace();
278
+
279
+ if (this.#position >= this.#input.length) {
280
+ return undefined;
281
+ }
282
+
283
+ const start = this.#position;
284
+ const ch = this.peek() ?? "";
285
+
286
+ // Try multi-character operators first
287
+ if (this.startsWith("-Infinity")) {
288
+ this.bump(9);
289
+ return Ok({ token: { type: "NegInfinity" }, span: this.spanFrom(start) });
290
+ }
291
+
292
+ if (this.startsWith("...")) {
293
+ this.bump(3);
294
+ return Ok({ token: { type: "Ellipsis" }, span: this.spanFrom(start) });
295
+ }
296
+
297
+ // Two-dot ellipsis for ranges (check after three-dot)
298
+ if (this.startsWith("..") && !this.startsWith("...")) {
299
+ this.bump(2);
300
+ return Ok({ token: { type: "Ellipsis" }, span: this.spanFrom(start) });
301
+ }
302
+
303
+ if (this.startsWith(">=")) {
304
+ this.bump(2);
305
+ return Ok({ token: { type: "GreaterThanOrEqual" }, span: this.spanFrom(start) });
306
+ }
307
+
308
+ if (this.startsWith("<=")) {
309
+ this.bump(2);
310
+ return Ok({ token: { type: "LessThanOrEqual" }, span: this.spanFrom(start) });
311
+ }
312
+
313
+ if (this.startsWith("*?")) {
314
+ this.bump(2);
315
+ return Ok({ token: { type: "RepeatZeroOrMoreLazy" }, span: this.spanFrom(start) });
316
+ }
317
+
318
+ if (this.startsWith("*+")) {
319
+ this.bump(2);
320
+ return Ok({ token: { type: "RepeatZeroOrMorePossessive" }, span: this.spanFrom(start) });
321
+ }
322
+
323
+ if (this.startsWith("+?")) {
324
+ this.bump(2);
325
+ return Ok({ token: { type: "RepeatOneOrMoreLazy" }, span: this.spanFrom(start) });
326
+ }
327
+
328
+ if (this.startsWith("++")) {
329
+ this.bump(2);
330
+ return Ok({ token: { type: "RepeatOneOrMorePossessive" }, span: this.spanFrom(start) });
331
+ }
332
+
333
+ if (this.startsWith("??")) {
334
+ this.bump(2);
335
+ return Ok({ token: { type: "RepeatZeroOrOneLazy" }, span: this.spanFrom(start) });
336
+ }
337
+
338
+ if (this.startsWith("?+")) {
339
+ this.bump(2);
340
+ return Ok({ token: { type: "RepeatZeroOrOnePossessive" }, span: this.spanFrom(start) });
341
+ }
342
+
343
+ // Single character operators
344
+ switch (ch) {
345
+ case "&":
346
+ this.advance();
347
+ return Ok({ token: { type: "And" }, span: this.spanFrom(start) });
348
+ case "|":
349
+ this.advance();
350
+ return Ok({ token: { type: "Or" }, span: this.spanFrom(start) });
351
+ case "!":
352
+ this.advance();
353
+ return Ok({ token: { type: "Not" }, span: this.spanFrom(start) });
354
+ case "*":
355
+ this.advance();
356
+ return Ok({ token: { type: "RepeatZeroOrMore" }, span: this.spanFrom(start) });
357
+ case "+":
358
+ this.advance();
359
+ return Ok({ token: { type: "RepeatOneOrMore" }, span: this.spanFrom(start) });
360
+ case "?":
361
+ this.advance();
362
+ return Ok({ token: { type: "RepeatZeroOrOne" }, span: this.spanFrom(start) });
363
+ case "(":
364
+ this.advance();
365
+ return Ok({ token: { type: "ParenOpen" }, span: this.spanFrom(start) });
366
+ case ")":
367
+ this.advance();
368
+ return Ok({ token: { type: "ParenClose" }, span: this.spanFrom(start) });
369
+ case "[":
370
+ this.advance();
371
+ return Ok({ token: { type: "BracketOpen" }, span: this.spanFrom(start) });
372
+ case "]":
373
+ this.advance();
374
+ return Ok({ token: { type: "BracketClose" }, span: this.spanFrom(start) });
375
+ case "}":
376
+ this.advance();
377
+ return Ok({ token: { type: "BraceClose" }, span: this.spanFrom(start) });
378
+ case ",":
379
+ this.advance();
380
+ return Ok({ token: { type: "Comma" }, span: this.spanFrom(start) });
381
+ case ":":
382
+ this.advance();
383
+ return Ok({ token: { type: "Colon" }, span: this.spanFrom(start) });
384
+ case ">":
385
+ this.advance();
386
+ return Ok({ token: { type: "GreaterThan" }, span: this.spanFrom(start) });
387
+ case "<":
388
+ this.advance();
389
+ return Ok({ token: { type: "LessThan" }, span: this.spanFrom(start) });
390
+ }
391
+
392
+ // Brace open - may be range or just brace
393
+ if (ch === "{") {
394
+ this.advance();
395
+ return this.parseBraceOpen(start);
396
+ }
397
+
398
+ // String literal
399
+ if (ch === '"') {
400
+ this.advance();
401
+ return this.parseString(start);
402
+ }
403
+
404
+ // Single quoted string
405
+ if (ch === "'") {
406
+ this.advance();
407
+ return this.parseSingleQuoted(start);
408
+ }
409
+
410
+ // Regex
411
+ if (ch === "/") {
412
+ this.advance();
413
+ return this.parseRegex(start);
414
+ }
415
+
416
+ // Group name (@name)
417
+ if (ch === "@") {
418
+ this.advance();
419
+ return this.parseGroupName(start);
420
+ }
421
+
422
+ // Hex string or hex regex (h'...' or h'/.../')
423
+ if (ch === "h" && this.peekAt(1) === "'") {
424
+ this.bump(2);
425
+ // Check if it's a hex regex
426
+ if (this.peek() === "/") {
427
+ this.advance();
428
+ return this.parseHexRegex(start);
429
+ }
430
+ return this.parseHexString(start);
431
+ }
432
+
433
+ // Number literal (including negative)
434
+ if (isDigit(ch) || (ch === "-" && isDigit(this.peekAt(1) ?? ""))) {
435
+ return this.parseNumber(start);
436
+ }
437
+
438
+ // Identifier or keyword (including date' and digest')
439
+ if (isIdentStart(ch)) {
440
+ return this.parseIdentifierOrKeyword(start);
441
+ }
442
+
443
+ // Unrecognized token
444
+ this.advance();
445
+ return Err({ type: "UnrecognizedToken", span: this.spanFrom(start) });
446
+ }
447
+
448
+ /**
449
+ * Tokenizes the entire input and returns all tokens.
450
+ */
451
+ tokenize(): Result<SpannedToken[]> {
452
+ const tokens: SpannedToken[] = [];
453
+
454
+ while (true) {
455
+ const result = this.next();
456
+ if (result === undefined) {
457
+ break;
458
+ }
459
+ if (!result.ok) {
460
+ return result as Result<SpannedToken[]>;
461
+ }
462
+ tokens.push(result.value);
463
+ }
464
+
465
+ return Ok(tokens);
466
+ }
467
+
468
+ /**
469
+ * Parse { - could be BraceOpen or Range.
470
+ */
471
+ private parseBraceOpen(start: number): Result<SpannedToken> {
472
+ // Look ahead to see if this is a range pattern
473
+ const remainder = this.remainder();
474
+
475
+ // Skip whitespace and check for digit
476
+ let pos = 0;
477
+ while (pos < remainder.length && isWhitespace(remainder[pos])) {
478
+ pos++;
479
+ }
480
+
481
+ if (pos < remainder.length && isDigit(remainder[pos])) {
482
+ // Check if it looks like a range pattern
483
+ if (this.looksLikeRangePattern(remainder.slice(pos))) {
484
+ return this.parseRange(start);
485
+ }
486
+ }
487
+
488
+ return Ok({ token: { type: "BraceOpen" }, span: this.spanFrom(start) });
489
+ }
490
+
491
+ /**
492
+ * Check if content looks like a range pattern.
493
+ */
494
+ private looksLikeRangePattern(content: string): boolean {
495
+ let i = 0;
496
+
497
+ // Skip whitespace
498
+ while (i < content.length && isWhitespace(content[i])) {
499
+ i++;
500
+ }
501
+
502
+ // Need at least one digit
503
+ if (i >= content.length || !isDigit(content[i])) {
504
+ return false;
505
+ }
506
+
507
+ // Skip digits
508
+ while (i < content.length && isDigit(content[i])) {
509
+ i++;
510
+ }
511
+
512
+ // Skip whitespace
513
+ while (i < content.length && isWhitespace(content[i])) {
514
+ i++;
515
+ }
516
+
517
+ // After digits, should see comma or closing brace
518
+ // If we see a colon, it's a map constraint, not a range
519
+ if (i < content.length) {
520
+ const ch = content[i];
521
+ if (ch === ":") {
522
+ return false;
523
+ }
524
+ return ch === "," || ch === "}";
525
+ }
526
+
527
+ return false;
528
+ }
529
+
530
+ /**
531
+ * Parse a range pattern like {1,5} or {3,} or {5}.
532
+ */
533
+ private parseRange(start: number): Result<SpannedToken> {
534
+ // Skip whitespace
535
+ this.skipWhitespace();
536
+
537
+ // Parse first number
538
+ const minStart = this.#position;
539
+ let peeked = this.peek();
540
+ while (peeked !== undefined && isDigit(peeked)) {
541
+ this.advance();
542
+ peeked = this.peek();
543
+ }
544
+
545
+ if (this.#position === minStart) {
546
+ return Err({ type: "InvalidRange", span: this.spanFrom(start) });
547
+ }
548
+
549
+ const min = parseInt(this.#input.slice(minStart, this.#position), 10);
550
+
551
+ this.skipWhitespace();
552
+
553
+ let max: number | undefined;
554
+
555
+ const nextCh = this.peek();
556
+ if (nextCh === ",") {
557
+ this.advance();
558
+ this.skipWhitespace();
559
+
560
+ const afterComma = this.peek();
561
+ if (afterComma === "}") {
562
+ // Unbounded: {n,}
563
+ this.advance();
564
+ max = undefined;
565
+ } else if (afterComma !== undefined && isDigit(afterComma)) {
566
+ // Bounded: {n,m}
567
+ const maxStart = this.#position;
568
+ let maxPeeked = this.peek();
569
+ while (maxPeeked !== undefined && isDigit(maxPeeked)) {
570
+ this.advance();
571
+ maxPeeked = this.peek();
572
+ }
573
+ max = parseInt(this.#input.slice(maxStart, this.#position), 10);
574
+
575
+ this.skipWhitespace();
576
+ if (this.peek() !== "}") {
577
+ return Err({ type: "InvalidRange", span: this.spanFrom(start) });
578
+ }
579
+ this.advance();
580
+ } else {
581
+ return Err({ type: "InvalidRange", span: this.spanFrom(start) });
582
+ }
583
+ } else if (nextCh === "}") {
584
+ // Exact: {n}
585
+ this.advance();
586
+ max = min;
587
+ } else {
588
+ return Err({ type: "InvalidRange", span: this.spanFrom(start) });
589
+ }
590
+
591
+ // Check for reluctance modifier
592
+ let reluctance = Reluctance.Greedy;
593
+ const modCh = this.peek();
594
+ if (modCh === "?") {
595
+ this.advance();
596
+ reluctance = Reluctance.Lazy;
597
+ } else if (modCh === "+") {
598
+ this.advance();
599
+ reluctance = Reluctance.Possessive;
600
+ }
601
+
602
+ // Validate range
603
+ if (max !== undefined && min > max) {
604
+ return Err({ type: "InvalidRange", span: this.spanFrom(start) });
605
+ }
606
+
607
+ const quantifier =
608
+ max !== undefined
609
+ ? Quantifier.between(min, max, reluctance)
610
+ : Quantifier.atLeast(min, reluctance);
611
+
612
+ return Ok({ token: { type: "Range", quantifier }, span: this.spanFrom(start) });
613
+ }
614
+
615
+ /**
616
+ * Parse a string literal.
617
+ */
618
+ private parseString(start: number): Result<SpannedToken> {
619
+ let result = "";
620
+ let escape = false;
621
+
622
+ while (this.#position < this.#input.length) {
623
+ const ch = this.advance() ?? "";
624
+
625
+ if (escape) {
626
+ switch (ch) {
627
+ case '"':
628
+ result += '"';
629
+ break;
630
+ case "\\":
631
+ result += "\\";
632
+ break;
633
+ case "n":
634
+ result += "\n";
635
+ break;
636
+ case "r":
637
+ result += "\r";
638
+ break;
639
+ case "t":
640
+ result += "\t";
641
+ break;
642
+ default:
643
+ result += "\\";
644
+ result += ch;
645
+ break;
646
+ }
647
+ escape = false;
648
+ } else if (ch === "\\") {
649
+ escape = true;
650
+ } else if (ch === '"') {
651
+ return Ok({ token: { type: "StringLiteral", value: result }, span: this.spanFrom(start) });
652
+ } else {
653
+ result += ch;
654
+ }
655
+ }
656
+
657
+ return Err({ type: "UnterminatedString", span: this.spanFrom(start) });
658
+ }
659
+
660
+ /**
661
+ * Parse a single-quoted string.
662
+ */
663
+ private parseSingleQuoted(start: number): Result<SpannedToken> {
664
+ let result = "";
665
+ let escape = false;
666
+
667
+ while (this.#position < this.#input.length) {
668
+ const ch = this.advance() ?? "";
669
+
670
+ if (escape) {
671
+ switch (ch) {
672
+ case "'":
673
+ result += "'";
674
+ break;
675
+ case "\\":
676
+ result += "\\";
677
+ break;
678
+ case "n":
679
+ result += "\n";
680
+ break;
681
+ case "r":
682
+ result += "\r";
683
+ break;
684
+ case "t":
685
+ result += "\t";
686
+ break;
687
+ default:
688
+ result += "\\";
689
+ result += ch;
690
+ break;
691
+ }
692
+ escape = false;
693
+ } else if (ch === "\\") {
694
+ escape = true;
695
+ } else if (ch === "'") {
696
+ return Ok({ token: { type: "SingleQuoted", value: result }, span: this.spanFrom(start) });
697
+ } else {
698
+ result += ch;
699
+ }
700
+ }
701
+
702
+ return Err({ type: "UnterminatedString", span: this.spanFrom(start) });
703
+ }
704
+
705
+ /**
706
+ * Parse a regex pattern.
707
+ */
708
+ private parseRegex(start: number): Result<SpannedToken> {
709
+ let pattern = "";
710
+ let escape = false;
711
+
712
+ while (this.#position < this.#input.length) {
713
+ const ch = this.advance() ?? "";
714
+
715
+ if (escape) {
716
+ pattern += ch;
717
+ escape = false;
718
+ } else if (ch === "\\") {
719
+ pattern += ch;
720
+ escape = true;
721
+ } else if (ch === "/") {
722
+ // Validate regex
723
+ try {
724
+ new RegExp(pattern);
725
+ } catch {
726
+ return Err({ type: "InvalidRegex", span: this.spanFrom(start) });
727
+ }
728
+ return Ok({ token: { type: "Regex", pattern }, span: this.spanFrom(start) });
729
+ } else {
730
+ pattern += ch;
731
+ }
732
+ }
733
+
734
+ return Err({ type: "UnterminatedRegex", span: this.spanFrom(start) });
735
+ }
736
+
737
+ /**
738
+ * Parse a group name.
739
+ */
740
+ private parseGroupName(start: number): Result<SpannedToken> {
741
+ const nameStart = this.#position;
742
+
743
+ // First char must be identifier start
744
+ if (!isIdentStart(this.peek() ?? "")) {
745
+ return Err({ type: "InvalidCaptureGroupName", name: "", span: this.spanFrom(start) });
746
+ }
747
+
748
+ let identCh = this.peek();
749
+ while (identCh !== undefined && isIdentCont(identCh)) {
750
+ this.advance();
751
+ identCh = this.peek();
752
+ }
753
+
754
+ const name = this.#input.slice(nameStart, this.#position);
755
+ return Ok({ token: { type: "GroupName", name }, span: this.spanFrom(start) });
756
+ }
757
+
758
+ /**
759
+ * Parse a hex string.
760
+ */
761
+ private parseHexString(start: number): Result<SpannedToken> {
762
+ let hex = "";
763
+
764
+ while (this.#position < this.#input.length) {
765
+ const ch = this.peek() ?? "";
766
+
767
+ if (ch === "'") {
768
+ this.advance();
769
+ const bytes = hexToBytes(hex);
770
+ if (bytes === undefined) {
771
+ return Err({ type: "InvalidHexString", span: this.spanFrom(start) });
772
+ }
773
+ return Ok({ token: { type: "HexString", value: bytes }, span: this.spanFrom(start) });
774
+ } else if (isHexDigit(ch)) {
775
+ hex += ch;
776
+ this.advance();
777
+ } else {
778
+ return Err({ type: "InvalidHexString", span: this.spanFrom(start) });
779
+ }
780
+ }
781
+
782
+ return Err({ type: "UnterminatedHexString", span: this.spanFrom(start) });
783
+ }
784
+
785
+ /**
786
+ * Parse a hex regex pattern.
787
+ */
788
+ private parseHexRegex(start: number): Result<SpannedToken> {
789
+ let pattern = "";
790
+ let escape = false;
791
+
792
+ while (this.#position < this.#input.length) {
793
+ const ch = this.advance() ?? "";
794
+
795
+ if (escape) {
796
+ pattern += ch;
797
+ escape = false;
798
+ } else if (ch === "\\") {
799
+ pattern += ch;
800
+ escape = true;
801
+ } else if (ch === "/") {
802
+ // Check for closing '
803
+ if (this.peek() === "'") {
804
+ this.advance();
805
+ // Validate regex
806
+ try {
807
+ new RegExp(pattern);
808
+ } catch {
809
+ return Err({ type: "InvalidRegex", span: this.spanFrom(start) });
810
+ }
811
+ return Ok({ token: { type: "HexRegex", pattern }, span: this.spanFrom(start) });
812
+ }
813
+ pattern += ch;
814
+ } else {
815
+ pattern += ch;
816
+ }
817
+ }
818
+
819
+ return Err({ type: "UnterminatedRegex", span: this.spanFrom(start) });
820
+ }
821
+
822
+ /**
823
+ * Parse a number literal.
824
+ */
825
+ private parseNumber(start: number): Result<SpannedToken> {
826
+ const numStart = this.#position;
827
+
828
+ // Optional negative sign
829
+ if (this.peek() === "-") {
830
+ this.advance();
831
+ }
832
+
833
+ // Integer part
834
+ if (this.peek() === "0") {
835
+ this.advance();
836
+ } else if (isDigit(this.peek() ?? "")) {
837
+ while (isDigit(this.peek() ?? "")) {
838
+ this.advance();
839
+ }
840
+ } else {
841
+ return Err({ type: "InvalidNumberFormat", span: this.spanFrom(start) });
842
+ }
843
+
844
+ // Fractional part (but not if it's the start of a range like 1..10)
845
+ if (this.peek() === "." && this.peekAt(1) !== ".") {
846
+ this.advance();
847
+ if (!isDigit(this.peek() ?? "")) {
848
+ return Err({ type: "InvalidNumberFormat", span: this.spanFrom(start) });
849
+ }
850
+ while (isDigit(this.peek() ?? "")) {
851
+ this.advance();
852
+ }
853
+ }
854
+
855
+ // Exponent part
856
+ if (this.peek() === "e" || this.peek() === "E") {
857
+ this.advance();
858
+ if (this.peek() === "+" || this.peek() === "-") {
859
+ this.advance();
860
+ }
861
+ if (!isDigit(this.peek() ?? "")) {
862
+ return Err({ type: "InvalidNumberFormat", span: this.spanFrom(start) });
863
+ }
864
+ while (isDigit(this.peek() ?? "")) {
865
+ this.advance();
866
+ }
867
+ }
868
+
869
+ const numStr = this.#input.slice(numStart, this.#position);
870
+ const value = parseFloat(numStr);
871
+
872
+ if (!isFinite(value)) {
873
+ return Err({ type: "InvalidNumberFormat", span: this.spanFrom(start) });
874
+ }
875
+
876
+ return Ok({ token: { type: "NumberLiteral", value }, span: this.spanFrom(start) });
877
+ }
878
+
879
+ /**
880
+ * Parse an identifier or keyword.
881
+ */
882
+ private parseIdentifierOrKeyword(start: number): Result<SpannedToken> {
883
+ const identStart = this.#position;
884
+
885
+ let identCh = this.peek();
886
+ while (identCh !== undefined && isIdentCont(identCh)) {
887
+ this.advance();
888
+ identCh = this.peek();
889
+ }
890
+
891
+ const ident = this.#input.slice(identStart, this.#position);
892
+
893
+ // Check for special quoted patterns
894
+ if (ident === "date" && this.peek() === "'") {
895
+ this.advance();
896
+ return this.parseDateQuoted(start);
897
+ }
898
+
899
+ if (ident === "digest" && this.peek() === "'") {
900
+ this.advance();
901
+ return this.parseDigestQuoted(start);
902
+ }
903
+
904
+ // Check for keyword
905
+ const keyword = KEYWORDS[ident];
906
+ if (keyword !== undefined) {
907
+ return Ok({ token: keyword, span: this.spanFrom(start) });
908
+ }
909
+
910
+ // Unknown identifier - treat as unrecognized
911
+ return Err({ type: "UnrecognizedToken", span: this.spanFrom(start) });
912
+ }
913
+
914
+ /**
915
+ * Parse a date quoted pattern.
916
+ */
917
+ private parseDateQuoted(start: number): Result<SpannedToken> {
918
+ let content = "";
919
+
920
+ while (this.#position < this.#input.length) {
921
+ const ch = this.advance() ?? "";
922
+
923
+ if (ch === "'") {
924
+ if (content.length === 0) {
925
+ return Err({ type: "InvalidDateFormat", span: this.spanFrom(start) });
926
+ }
927
+ return Ok({ token: { type: "DateQuoted", value: content }, span: this.spanFrom(start) });
928
+ }
929
+ content += ch;
930
+ }
931
+
932
+ return Err({ type: "UnterminatedDateQuoted", span: this.spanFrom(start) });
933
+ }
934
+
935
+ /**
936
+ * Parse a digest quoted pattern.
937
+ */
938
+ private parseDigestQuoted(start: number): Result<SpannedToken> {
939
+ let content = "";
940
+
941
+ while (this.#position < this.#input.length) {
942
+ const ch = this.advance() ?? "";
943
+
944
+ if (ch === "'") {
945
+ if (content.length === 0) {
946
+ return Err({
947
+ type: "InvalidDigestPattern",
948
+ message: "empty content",
949
+ span: this.spanFrom(start),
950
+ });
951
+ }
952
+ return Ok({ token: { type: "DigestQuoted", value: content }, span: this.spanFrom(start) });
953
+ }
954
+ content += ch;
955
+ }
956
+
957
+ return Err({ type: "UnterminatedDigestQuoted", span: this.spanFrom(start) });
958
+ }
959
+
960
+ /**
961
+ * Peeks at the next token without consuming it.
962
+ * Returns a Result with the token or undefined if at end of input.
963
+ */
964
+ peekToken(): Result<Token> | undefined {
965
+ const savedPosition = this.#position;
966
+ const result = this.next();
967
+ this.#position = savedPosition;
968
+
969
+ if (result === undefined) {
970
+ return undefined;
971
+ }
972
+
973
+ if (!result.ok) {
974
+ return result;
975
+ }
976
+
977
+ return Ok(result.value.token);
978
+ }
979
+
980
+ /**
981
+ * Returns the current span (position to position).
982
+ */
983
+ span(): Span {
984
+ return span(this.#position, this.#position);
985
+ }
986
+
987
+ /**
988
+ * Returns the last token's span.
989
+ */
990
+ lastSpan(): Span {
991
+ // This is a simplification - in reality we'd track the last span
992
+ return span(this.#position, this.#position);
993
+ }
994
+ }
995
+
996
+ // Re-export Span
997
+ export type { Span };