@bcts/envelope-pattern 1.0.0-alpha.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/LICENSE +48 -0
  2. package/README.md +13 -0
  3. package/dist/index.cjs +6781 -0
  4. package/dist/index.cjs.map +1 -0
  5. package/dist/index.d.cts +2628 -0
  6. package/dist/index.d.cts.map +1 -0
  7. package/dist/index.d.mts +2628 -0
  8. package/dist/index.d.mts.map +1 -0
  9. package/dist/index.iife.js +6781 -0
  10. package/dist/index.iife.js.map +1 -0
  11. package/dist/index.mjs +6545 -0
  12. package/dist/index.mjs.map +1 -0
  13. package/package.json +77 -0
  14. package/src/error.ts +262 -0
  15. package/src/format.ts +375 -0
  16. package/src/index.ts +27 -0
  17. package/src/parse/index.ts +923 -0
  18. package/src/parse/token.ts +906 -0
  19. package/src/parse/utils.ts +339 -0
  20. package/src/pattern/index.ts +719 -0
  21. package/src/pattern/leaf/array-pattern.ts +273 -0
  22. package/src/pattern/leaf/bool-pattern.ts +140 -0
  23. package/src/pattern/leaf/byte-string-pattern.ts +172 -0
  24. package/src/pattern/leaf/cbor-pattern.ts +355 -0
  25. package/src/pattern/leaf/date-pattern.ts +178 -0
  26. package/src/pattern/leaf/index.ts +280 -0
  27. package/src/pattern/leaf/known-value-pattern.ts +192 -0
  28. package/src/pattern/leaf/map-pattern.ts +152 -0
  29. package/src/pattern/leaf/null-pattern.ts +110 -0
  30. package/src/pattern/leaf/number-pattern.ts +248 -0
  31. package/src/pattern/leaf/tagged-pattern.ts +228 -0
  32. package/src/pattern/leaf/text-pattern.ts +165 -0
  33. package/src/pattern/matcher.ts +88 -0
  34. package/src/pattern/meta/and-pattern.ts +109 -0
  35. package/src/pattern/meta/any-pattern.ts +81 -0
  36. package/src/pattern/meta/capture-pattern.ts +111 -0
  37. package/src/pattern/meta/group-pattern.ts +110 -0
  38. package/src/pattern/meta/index.ts +269 -0
  39. package/src/pattern/meta/not-pattern.ts +91 -0
  40. package/src/pattern/meta/or-pattern.ts +146 -0
  41. package/src/pattern/meta/search-pattern.ts +201 -0
  42. package/src/pattern/meta/traverse-pattern.ts +146 -0
  43. package/src/pattern/structure/assertions-pattern.ts +244 -0
  44. package/src/pattern/structure/digest-pattern.ts +225 -0
  45. package/src/pattern/structure/index.ts +272 -0
  46. package/src/pattern/structure/leaf-structure-pattern.ts +85 -0
  47. package/src/pattern/structure/node-pattern.ts +188 -0
  48. package/src/pattern/structure/object-pattern.ts +149 -0
  49. package/src/pattern/structure/obscured-pattern.ts +159 -0
  50. package/src/pattern/structure/predicate-pattern.ts +151 -0
  51. package/src/pattern/structure/subject-pattern.ts +152 -0
  52. package/src/pattern/structure/wrapped-pattern.ts +195 -0
  53. package/src/pattern/vm.ts +1021 -0
@@ -0,0 +1,906 @@
1
+ /**
2
+ * @bcts/envelope-pattern - Token types and Lexer
3
+ *
4
+ * This is a 1:1 TypeScript port of bc-envelope-pattern-rust token.rs
5
+ * Uses a manual lexer implementation instead of logos.
6
+ *
7
+ * @module envelope-pattern/parse/token
8
+ */
9
+
10
+ import { Quantifier, Reluctance } from "@bcts/dcbor-pattern";
11
+ import {
12
+ type Span,
13
+ type Result,
14
+ type EnvelopePatternError,
15
+ ok,
16
+ err,
17
+ invalidRegex,
18
+ unterminatedRegex,
19
+ invalidRange,
20
+ invalidHexString,
21
+ unexpectedEndOfInput,
22
+ invalidNumberFormat,
23
+ } from "../error";
24
+
25
+ /**
26
+ * Token types for the Gordian Envelope pattern syntax.
27
+ *
28
+ * Corresponds to the Rust `Token` enum in token.rs
29
+ */
30
+ export type Token =
31
+ // Meta Pattern Operators
32
+ | { readonly type: "And" }
33
+ | { readonly type: "Or" }
34
+ | { readonly type: "Not" }
35
+ | { readonly type: "Traverse" }
36
+ | { readonly type: "RepeatZeroOrMore" }
37
+ | { readonly type: "RepeatZeroOrMoreLazy" }
38
+ | { readonly type: "RepeatZeroOrMorePossessive" }
39
+ | { readonly type: "RepeatOneOrMore" }
40
+ | { readonly type: "RepeatOneOrMoreLazy" }
41
+ | { readonly type: "RepeatOneOrMorePossessive" }
42
+ | { readonly type: "RepeatZeroOrOne" }
43
+ | { readonly type: "RepeatZeroOrOneLazy" }
44
+ | { readonly type: "RepeatZeroOrOnePossessive" }
45
+ // Structure Pattern Keywords
46
+ | { readonly type: "Assertion" }
47
+ | { readonly type: "AssertionPred" }
48
+ | { readonly type: "AssertionObj" }
49
+ | { readonly type: "Digest" }
50
+ | { readonly type: "Node" }
51
+ | { readonly type: "Obj" }
52
+ | { readonly type: "Obscured" }
53
+ | { readonly type: "Elided" }
54
+ | { readonly type: "Encrypted" }
55
+ | { readonly type: "Compressed" }
56
+ | { readonly type: "Pred" }
57
+ | { readonly type: "Subject" }
58
+ | { readonly type: "Wrapped" }
59
+ | { readonly type: "Unwrap" }
60
+ | { readonly type: "Search" }
61
+ // Leaf Pattern Keywords
62
+ | { readonly type: "ByteString" }
63
+ | { readonly type: "Leaf" }
64
+ | { readonly type: "Cbor" }
65
+ | { readonly type: "DateKeyword" }
66
+ | { readonly type: "Known" }
67
+ | { readonly type: "Null" }
68
+ | { readonly type: "NumberKeyword" }
69
+ | { readonly type: "Tagged" }
70
+ // Special literals
71
+ | { readonly type: "BoolKeyword" }
72
+ | { readonly type: "BoolTrue" }
73
+ | { readonly type: "BoolFalse" }
74
+ | { readonly type: "TextKeyword" }
75
+ | { readonly type: "NaN" }
76
+ | { readonly type: "StringLiteral"; readonly value: Result<string> }
77
+ // Grouping and Range delimiters
78
+ | { readonly type: "ParenOpen" }
79
+ | { readonly type: "ParenClose" }
80
+ | { readonly type: "BracketOpen" }
81
+ | { readonly type: "BracketClose" }
82
+ | { readonly type: "Comma" }
83
+ | { readonly type: "Ellipsis" }
84
+ | { readonly type: "GreaterThanOrEqual" }
85
+ | { readonly type: "LessThanOrEqual" }
86
+ | { readonly type: "GreaterThan" }
87
+ | { readonly type: "LessThan" }
88
+ // Numbers
89
+ | { readonly type: "Integer"; readonly value: Result<number> }
90
+ | { readonly type: "UnsignedInteger"; readonly value: Result<number> }
91
+ | { readonly type: "Float"; readonly value: Result<number> }
92
+ | { readonly type: "Infinity" }
93
+ | { readonly type: "NegativeInfinity" }
94
+ // Complex tokens
95
+ | { readonly type: "GroupName"; readonly name: string }
96
+ | { readonly type: "Regex"; readonly value: Result<string> }
97
+ | { readonly type: "HexPattern"; readonly value: Result<Uint8Array> }
98
+ | { readonly type: "HexBinaryRegex"; readonly value: Result<string> }
99
+ | { readonly type: "DatePattern"; readonly value: Result<string> }
100
+ | { readonly type: "Range"; readonly value: Result<Quantifier> }
101
+ | { readonly type: "SingleQuotedPattern"; readonly value: Result<string> }
102
+ | { readonly type: "SingleQuotedRegex"; readonly value: Result<string> };
103
+
104
+ /**
105
+ * Keyword to token type mapping.
106
+ */
107
+ const KEYWORDS = new Map<string, Token>([
108
+ // Meta Pattern Operators
109
+ ["&", { type: "And" }],
110
+ ["|", { type: "Or" }],
111
+ ["!", { type: "Not" }],
112
+ // Structure Pattern Keywords
113
+ ["assert", { type: "Assertion" }],
114
+ ["assertpred", { type: "AssertionPred" }],
115
+ ["assertobj", { type: "AssertionObj" }],
116
+ ["digest", { type: "Digest" }],
117
+ ["node", { type: "Node" }],
118
+ ["obj", { type: "Obj" }],
119
+ ["obscured", { type: "Obscured" }],
120
+ ["elided", { type: "Elided" }],
121
+ ["encrypted", { type: "Encrypted" }],
122
+ ["compressed", { type: "Compressed" }],
123
+ ["pred", { type: "Pred" }],
124
+ ["subj", { type: "Subject" }],
125
+ ["wrapped", { type: "Wrapped" }],
126
+ ["unwrap", { type: "Unwrap" }],
127
+ ["search", { type: "Search" }],
128
+ // Leaf Pattern Keywords
129
+ ["bstr", { type: "ByteString" }],
130
+ ["leaf", { type: "Leaf" }],
131
+ ["cbor", { type: "Cbor" }],
132
+ ["date", { type: "DateKeyword" }],
133
+ ["known", { type: "Known" }],
134
+ ["null", { type: "Null" }],
135
+ ["number", { type: "NumberKeyword" }],
136
+ ["tagged", { type: "Tagged" }],
137
+ // Special literals
138
+ ["bool", { type: "BoolKeyword" }],
139
+ ["true", { type: "BoolTrue" }],
140
+ ["false", { type: "BoolFalse" }],
141
+ ["text", { type: "TextKeyword" }],
142
+ ["NaN", { type: "NaN" }],
143
+ ["Infinity", { type: "Infinity" }],
144
+ ["-Infinity", { type: "NegativeInfinity" }],
145
+ ]);
146
+
147
+ /**
148
+ * Checks if a character is whitespace.
149
+ */
150
+ function isWhitespace(ch: string): boolean {
151
+ return ch === " " || ch === "\t" || ch === "\n" || ch === "\r" || ch === "\f";
152
+ }
153
+
154
+ /**
155
+ * Checks if a character can start an identifier.
156
+ */
157
+ function isIdentStart(ch: string): boolean {
158
+ return (ch >= "a" && ch <= "z") || (ch >= "A" && ch <= "Z") || ch === "_";
159
+ }
160
+
161
+ /**
162
+ * Checks if a character can continue an identifier.
163
+ */
164
+ function isIdentContinue(ch: string): boolean {
165
+ return isIdentStart(ch) || (ch >= "0" && ch <= "9");
166
+ }
167
+
168
+ /**
169
+ * Checks if a character is a digit.
170
+ */
171
+ function isDigit(ch: string): boolean {
172
+ return ch >= "0" && ch <= "9";
173
+ }
174
+
175
+ /**
176
+ * Checks if a character is a hex digit.
177
+ */
178
+ function isHexDigit(ch: string): boolean {
179
+ return (ch >= "0" && ch <= "9") || (ch >= "a" && ch <= "f") || (ch >= "A" && ch <= "F");
180
+ }
181
+
182
+ /**
183
+ * Lexer for Gordian Envelope pattern syntax.
184
+ */
185
+ export class Lexer {
186
+ readonly #source: string;
187
+ #position = 0;
188
+ #tokenStart = 0;
189
+ #peekedToken: { token: Token; span: Span } | undefined = undefined;
190
+
191
+ constructor(source: string) {
192
+ this.#source = source;
193
+ }
194
+
195
+ /**
196
+ * Gets the current position in the source.
197
+ */
198
+ get position(): number {
199
+ return this.#position;
200
+ }
201
+
202
+ /**
203
+ * Peeks at the next token without consuming it.
204
+ */
205
+ peekToken(): { token: Token; span: Span } | undefined {
206
+ if (this.#peekedToken !== undefined) {
207
+ return this.#peekedToken;
208
+ }
209
+ const result = this.next();
210
+ this.#peekedToken = result;
211
+ return result;
212
+ }
213
+
214
+ /**
215
+ * Gets the current span (from token start to current position).
216
+ */
217
+ span(): Span {
218
+ return { start: this.#tokenStart, end: this.#position };
219
+ }
220
+
221
+ /**
222
+ * Gets the remaining source string.
223
+ */
224
+ remainder(): string {
225
+ return this.#source.slice(this.#position);
226
+ }
227
+
228
+ /**
229
+ * Peeks at the current character without consuming it.
230
+ */
231
+ peek(): string | undefined {
232
+ if (this.#position >= this.#source.length) {
233
+ return undefined;
234
+ }
235
+ return this.#source[this.#position];
236
+ }
237
+
238
+ /**
239
+ * Peeks at the next character without consuming current.
240
+ */
241
+ peekNext(): string | undefined {
242
+ if (this.#position + 1 >= this.#source.length) {
243
+ return undefined;
244
+ }
245
+ return this.#source[this.#position + 1];
246
+ }
247
+
248
+ /**
249
+ * Advances the position by n characters.
250
+ */
251
+ bump(n = 1): void {
252
+ this.#position = Math.min(this.#position + n, this.#source.length);
253
+ }
254
+
255
+ /**
256
+ * Skips whitespace.
257
+ */
258
+ #skipWhitespace(): void {
259
+ while (this.#position < this.#source.length) {
260
+ const ch = this.#source[this.#position];
261
+ if (ch !== undefined && isWhitespace(ch)) {
262
+ this.#position++;
263
+ } else {
264
+ break;
265
+ }
266
+ }
267
+ }
268
+
269
+ /**
270
+ * Parses a string literal (after the opening quote).
271
+ */
272
+ #parseStringLiteral(): Result<string> {
273
+ const src = this.remainder();
274
+ let escape = false;
275
+ let content = "";
276
+
277
+ for (let i = 0; i < src.length; i++) {
278
+ const b = src[i];
279
+ if (b === undefined) break;
280
+
281
+ if (b === '"' && !escape) {
282
+ // End of string
283
+ this.bump(i + 1);
284
+ return ok(content);
285
+ }
286
+
287
+ if (b === "\\" && !escape) {
288
+ escape = true;
289
+ continue;
290
+ }
291
+
292
+ if (escape) {
293
+ switch (b) {
294
+ case "n":
295
+ content += "\n";
296
+ break;
297
+ case "t":
298
+ content += "\t";
299
+ break;
300
+ case "r":
301
+ content += "\r";
302
+ break;
303
+ case "\\":
304
+ content += "\\";
305
+ break;
306
+ case '"':
307
+ content += '"';
308
+ break;
309
+ default:
310
+ // Invalid escape sequence, but we'll be lenient
311
+ content += "\\";
312
+ content += b;
313
+ }
314
+ escape = false;
315
+ } else {
316
+ content += b;
317
+ }
318
+ }
319
+
320
+ return err(unexpectedEndOfInput());
321
+ }
322
+
323
+ /**
324
+ * Parses a regex pattern (after the opening slash).
325
+ */
326
+ #parseRegex(): Result<string> {
327
+ const src = this.remainder();
328
+ let escape = false;
329
+
330
+ for (let i = 0; i < src.length; i++) {
331
+ const ch = src[i];
332
+ if (ch === undefined) break;
333
+
334
+ if (ch === "\\" && !escape) {
335
+ escape = true;
336
+ continue;
337
+ }
338
+
339
+ if (ch === "/" && !escape) {
340
+ // Found the closing delimiter
341
+ this.bump(i + 1);
342
+ const content = src.slice(0, i);
343
+ // Validate regex
344
+ try {
345
+ new RegExp(content);
346
+ return ok(content);
347
+ } catch {
348
+ return err(invalidRegex(this.span()));
349
+ }
350
+ }
351
+
352
+ escape = false;
353
+ }
354
+
355
+ return err(unterminatedRegex(this.span()));
356
+ }
357
+
358
+ /**
359
+ * Parses a hex pattern (after h').
360
+ */
361
+ #parseHexPattern(): Result<Uint8Array> {
362
+ const src = this.remainder();
363
+
364
+ for (let i = 0; i < src.length; i++) {
365
+ const ch = src[i];
366
+ if (ch === undefined) break;
367
+
368
+ if (ch === "'") {
369
+ // Found the closing delimiter
370
+ const hexStr = src.slice(0, i);
371
+ this.bump(i + 1);
372
+
373
+ // Validate and decode hex
374
+ if (hexStr.length % 2 !== 0) {
375
+ return err(invalidHexString(this.span()));
376
+ }
377
+
378
+ const bytes = new Uint8Array(hexStr.length / 2);
379
+ for (let j = 0; j < hexStr.length; j += 2) {
380
+ const byte = parseInt(hexStr.slice(j, j + 2), 16);
381
+ if (Number.isNaN(byte)) {
382
+ return err(invalidHexString(this.span()));
383
+ }
384
+ bytes[j / 2] = byte;
385
+ }
386
+ return ok(bytes);
387
+ }
388
+
389
+ if (!isHexDigit(ch)) {
390
+ return err(invalidHexString(this.span()));
391
+ }
392
+ }
393
+
394
+ return err(invalidHexString(this.span()));
395
+ }
396
+
397
+ /**
398
+ * Parses a hex binary regex (after h'/).
399
+ */
400
+ #parseHexBinaryRegex(): Result<string> {
401
+ const src = this.remainder();
402
+ let escape = false;
403
+
404
+ for (let i = 0; i < src.length; i++) {
405
+ const ch = src[i];
406
+ if (ch === undefined) break;
407
+
408
+ if (ch === "\\" && !escape) {
409
+ escape = true;
410
+ continue;
411
+ }
412
+
413
+ if (ch === "/" && !escape) {
414
+ // Found the closing delimiter
415
+ this.bump(i + 1);
416
+ // Check for optional closing '
417
+ if (this.peek() === "'") {
418
+ this.bump(1);
419
+ }
420
+ const regexStr = src.slice(0, i);
421
+ // Validate regex
422
+ try {
423
+ new RegExp(regexStr);
424
+ return ok(regexStr);
425
+ } catch {
426
+ return err(invalidRegex(this.span()));
427
+ }
428
+ }
429
+
430
+ escape = false;
431
+ }
432
+
433
+ return err(unterminatedRegex(this.span()));
434
+ }
435
+
436
+ /**
437
+ * Parses a date pattern (after date').
438
+ */
439
+ #parseDatePattern(): Result<string> {
440
+ const src = this.remainder();
441
+
442
+ for (let i = 0; i < src.length; i++) {
443
+ const ch = src[i];
444
+ if (ch === "'") {
445
+ // Found the closing delimiter
446
+ const content = src.slice(0, i);
447
+ this.bump(i + 1);
448
+ return ok(content);
449
+ }
450
+ }
451
+
452
+ return err(unterminatedRegex(this.span()));
453
+ }
454
+
455
+ /**
456
+ * Parses a range pattern (after {).
457
+ */
458
+ #parseRange(): Result<Quantifier> {
459
+ const src = this.remainder();
460
+ let pos = 0;
461
+
462
+ // Skip whitespace
463
+ while (pos < src.length && src[pos] !== undefined && isWhitespace(src[pos])) {
464
+ pos++;
465
+ }
466
+
467
+ // Parse minimum value
468
+ const minStart = pos;
469
+ while (pos < src.length && src[pos] !== undefined && isDigit(src[pos])) {
470
+ pos++;
471
+ }
472
+ if (minStart === pos) {
473
+ return err(invalidRange(this.span()));
474
+ }
475
+ const min = parseInt(src.slice(minStart, pos), 10);
476
+ if (Number.isNaN(min)) {
477
+ return err(invalidRange(this.span()));
478
+ }
479
+
480
+ // Skip whitespace
481
+ while (pos < src.length && src[pos] !== undefined && isWhitespace(src[pos])) {
482
+ pos++;
483
+ }
484
+
485
+ let max: number | undefined;
486
+ const ch = src[pos];
487
+
488
+ if (ch === ",") {
489
+ pos++;
490
+ // Skip whitespace
491
+ while (pos < src.length && src[pos] !== undefined && isWhitespace(src[pos])) {
492
+ pos++;
493
+ }
494
+
495
+ const nextCh = src[pos];
496
+ if (nextCh === "}") {
497
+ // {n,} - open ended
498
+ pos++;
499
+ max = undefined;
500
+ } else if (nextCh !== undefined && isDigit(nextCh)) {
501
+ // {n,m} - range
502
+ const maxStart = pos;
503
+ while (pos < src.length && src[pos] !== undefined && isDigit(src[pos])) {
504
+ pos++;
505
+ }
506
+ max = parseInt(src.slice(maxStart, pos), 10);
507
+ if (Number.isNaN(max)) {
508
+ return err(invalidRange(this.span()));
509
+ }
510
+
511
+ // Skip whitespace
512
+ while (pos < src.length && src[pos] !== undefined && isWhitespace(src[pos])) {
513
+ pos++;
514
+ }
515
+
516
+ if (src[pos] !== "}") {
517
+ return err(invalidRange(this.span()));
518
+ }
519
+ pos++;
520
+ } else {
521
+ return err(invalidRange(this.span()));
522
+ }
523
+ } else if (ch === "}") {
524
+ // {n} - exact
525
+ pos++;
526
+ max = min;
527
+ } else {
528
+ return err(invalidRange(this.span()));
529
+ }
530
+
531
+ // Determine greediness
532
+ let mode: Reluctance = Reluctance.Greedy;
533
+ const modeChar = src[pos];
534
+ if (modeChar === "?") {
535
+ pos++;
536
+ mode = Reluctance.Lazy;
537
+ } else if (modeChar === "+") {
538
+ pos++;
539
+ mode = Reluctance.Possessive;
540
+ }
541
+
542
+ this.bump(pos);
543
+
544
+ if (max !== undefined && min > max) {
545
+ return err(invalidRange(this.span()));
546
+ }
547
+
548
+ if (max !== undefined) {
549
+ return ok(Quantifier.between(min, max, mode));
550
+ }
551
+ return ok(Quantifier.atLeast(min, mode));
552
+ }
553
+
554
+ /**
555
+ * Parses a single quoted pattern (after ').
556
+ */
557
+ #parseSingleQuotedPattern(): Result<string> {
558
+ const src = this.remainder();
559
+
560
+ for (let i = 0; i < src.length; i++) {
561
+ const ch = src[i];
562
+ if (ch === "'") {
563
+ // Found the closing delimiter
564
+ const content = src.slice(0, i);
565
+ this.bump(i + 1);
566
+ return ok(content);
567
+ }
568
+ }
569
+
570
+ return err(unterminatedRegex(this.span()));
571
+ }
572
+
573
+ /**
574
+ * Parses a single quoted regex (after '/).
575
+ */
576
+ #parseSingleQuotedRegex(): Result<string> {
577
+ const src = this.remainder();
578
+ let escape = false;
579
+
580
+ for (let i = 0; i < src.length; i++) {
581
+ const ch = src[i];
582
+ if (ch === undefined) break;
583
+
584
+ if (ch === "\\" && !escape) {
585
+ escape = true;
586
+ continue;
587
+ }
588
+
589
+ if (ch === "/" && !escape) {
590
+ // Found the closing delimiter
591
+ this.bump(i + 1);
592
+ // Check for optional closing '
593
+ if (this.peek() === "'") {
594
+ this.bump(1);
595
+ }
596
+ const regexStr = src.slice(0, i);
597
+ // Validate regex
598
+ try {
599
+ new RegExp(regexStr);
600
+ return ok(regexStr);
601
+ } catch {
602
+ return err(invalidRegex(this.span()));
603
+ }
604
+ }
605
+
606
+ escape = false;
607
+ }
608
+
609
+ return err(unterminatedRegex(this.span()));
610
+ }
611
+
612
+ /**
613
+ * Parses a number (integer or float).
614
+ */
615
+ #parseNumber(): Token {
616
+ const startPos = this.#position;
617
+ let isFloat = false;
618
+ let isNegative = false;
619
+
620
+ // Check for negative sign
621
+ if (this.peek() === "-") {
622
+ isNegative = true;
623
+ this.bump(1);
624
+ }
625
+
626
+ // Parse integer part
627
+ let c = this.peek();
628
+ while (c !== undefined && isDigit(c)) {
629
+ this.bump(1);
630
+ c = this.peek();
631
+ }
632
+
633
+ // Check for decimal point
634
+ const nextC = this.peekNext();
635
+ if (this.peek() === "." && nextC !== undefined && isDigit(nextC)) {
636
+ isFloat = true;
637
+ this.bump(1); // consume '.'
638
+
639
+ // Parse fractional part
640
+ c = this.peek();
641
+ while (c !== undefined && isDigit(c)) {
642
+ this.bump(1);
643
+ c = this.peek();
644
+ }
645
+ }
646
+
647
+ // Check for exponent
648
+ if (this.peek() === "e" || this.peek() === "E") {
649
+ isFloat = true;
650
+ this.bump(1);
651
+
652
+ // Check for sign
653
+ if (this.peek() === "+" || this.peek() === "-") {
654
+ this.bump(1);
655
+ }
656
+
657
+ // Parse exponent digits
658
+ c = this.peek();
659
+ while (c !== undefined && isDigit(c)) {
660
+ this.bump(1);
661
+ c = this.peek();
662
+ }
663
+ }
664
+
665
+ const numStr = this.#source.slice(startPos, this.#position);
666
+
667
+ if (isFloat) {
668
+ const value = parseFloat(numStr);
669
+ if (Number.isNaN(value)) {
670
+ return { type: "Float", value: err(invalidNumberFormat(this.span())) };
671
+ }
672
+ return { type: "Float", value: ok(value) };
673
+ }
674
+
675
+ const value = parseInt(numStr, 10);
676
+ if (Number.isNaN(value)) {
677
+ return { type: "Integer", value: err(invalidNumberFormat(this.span())) };
678
+ }
679
+
680
+ if (isNegative) {
681
+ return { type: "Integer", value: ok(value) };
682
+ }
683
+ return { type: "UnsignedInteger", value: ok(value) };
684
+ }
685
+
686
+ /**
687
+ * Gets the next token from the input.
688
+ */
689
+ next(): { token: Token; span: Span } | undefined {
690
+ // Return peeked token if available
691
+ if (this.#peekedToken !== undefined) {
692
+ const peeked = this.#peekedToken;
693
+ this.#peekedToken = undefined;
694
+ return peeked;
695
+ }
696
+
697
+ this.#skipWhitespace();
698
+ this.#tokenStart = this.#position;
699
+
700
+ if (this.#position >= this.#source.length) {
701
+ return undefined;
702
+ }
703
+
704
+ const ch = this.#source[this.#position];
705
+ if (ch === undefined) return undefined;
706
+
707
+ // Check for two-character operators first
708
+ const twoChar = this.#source.slice(this.#position, this.#position + 2);
709
+ const threeChar = this.#source.slice(this.#position, this.#position + 3);
710
+
711
+ // Check for ... (ellipsis)
712
+ if (threeChar === "...") {
713
+ this.bump(3);
714
+ return { token: { type: "Ellipsis" }, span: this.span() };
715
+ }
716
+
717
+ // Check for -Infinity
718
+ if (this.#source.slice(this.#position, this.#position + 9) === "-Infinity") {
719
+ this.bump(9);
720
+ return { token: { type: "NegativeInfinity" }, span: this.span() };
721
+ }
722
+
723
+ // Check for two-character operators
724
+ switch (twoChar) {
725
+ case "->":
726
+ this.bump(2);
727
+ return { token: { type: "Traverse" }, span: this.span() };
728
+ case "*?":
729
+ this.bump(2);
730
+ return { token: { type: "RepeatZeroOrMoreLazy" }, span: this.span() };
731
+ case "*+":
732
+ this.bump(2);
733
+ return { token: { type: "RepeatZeroOrMorePossessive" }, span: this.span() };
734
+ case "+?":
735
+ this.bump(2);
736
+ return { token: { type: "RepeatOneOrMoreLazy" }, span: this.span() };
737
+ case "++":
738
+ this.bump(2);
739
+ return { token: { type: "RepeatOneOrMorePossessive" }, span: this.span() };
740
+ case "??":
741
+ this.bump(2);
742
+ return { token: { type: "RepeatZeroOrOneLazy" }, span: this.span() };
743
+ case "?+":
744
+ this.bump(2);
745
+ return { token: { type: "RepeatZeroOrOnePossessive" }, span: this.span() };
746
+ case ">=":
747
+ this.bump(2);
748
+ return { token: { type: "GreaterThanOrEqual" }, span: this.span() };
749
+ case "<=":
750
+ this.bump(2);
751
+ return { token: { type: "LessThanOrEqual" }, span: this.span() };
752
+ case "h'": {
753
+ this.bump(2);
754
+ // Check if followed by / for HexBinaryRegex
755
+ if (this.peek() === "/") {
756
+ this.bump(1);
757
+ return {
758
+ token: { type: "HexBinaryRegex", value: this.#parseHexBinaryRegex() },
759
+ span: this.span(),
760
+ };
761
+ }
762
+ return { token: { type: "HexPattern", value: this.#parseHexPattern() }, span: this.span() };
763
+ }
764
+ case "'/":
765
+ this.bump(2);
766
+ return {
767
+ token: { type: "SingleQuotedRegex", value: this.#parseSingleQuotedRegex() },
768
+ span: this.span(),
769
+ };
770
+ }
771
+
772
+ // Check for single character operators
773
+ switch (ch) {
774
+ case "&":
775
+ this.bump(1);
776
+ return { token: { type: "And" }, span: this.span() };
777
+ case "|":
778
+ this.bump(1);
779
+ return { token: { type: "Or" }, span: this.span() };
780
+ case "!":
781
+ this.bump(1);
782
+ return { token: { type: "Not" }, span: this.span() };
783
+ case "*":
784
+ this.bump(1);
785
+ return { token: { type: "RepeatZeroOrMore" }, span: this.span() };
786
+ case "+":
787
+ this.bump(1);
788
+ return { token: { type: "RepeatOneOrMore" }, span: this.span() };
789
+ case "?":
790
+ this.bump(1);
791
+ return { token: { type: "RepeatZeroOrOne" }, span: this.span() };
792
+ case "(":
793
+ this.bump(1);
794
+ return { token: { type: "ParenOpen" }, span: this.span() };
795
+ case ")":
796
+ this.bump(1);
797
+ return { token: { type: "ParenClose" }, span: this.span() };
798
+ case "[":
799
+ this.bump(1);
800
+ return { token: { type: "BracketOpen" }, span: this.span() };
801
+ case "]":
802
+ this.bump(1);
803
+ return { token: { type: "BracketClose" }, span: this.span() };
804
+ case ",":
805
+ this.bump(1);
806
+ return { token: { type: "Comma" }, span: this.span() };
807
+ case ">":
808
+ this.bump(1);
809
+ return { token: { type: "GreaterThan" }, span: this.span() };
810
+ case "<":
811
+ this.bump(1);
812
+ return { token: { type: "LessThan" }, span: this.span() };
813
+ case '"':
814
+ this.bump(1);
815
+ return {
816
+ token: { type: "StringLiteral", value: this.#parseStringLiteral() },
817
+ span: this.span(),
818
+ };
819
+ case "/":
820
+ this.bump(1);
821
+ return { token: { type: "Regex", value: this.#parseRegex() }, span: this.span() };
822
+ case "{":
823
+ this.bump(1);
824
+ return { token: { type: "Range", value: this.#parseRange() }, span: this.span() };
825
+ case "'":
826
+ this.bump(1);
827
+ return {
828
+ token: { type: "SingleQuotedPattern", value: this.#parseSingleQuotedPattern() },
829
+ span: this.span(),
830
+ };
831
+ case "@": {
832
+ // Group name
833
+ this.bump(1);
834
+ const start = this.#position;
835
+ let gc = this.peek();
836
+ if (gc !== undefined && isIdentStart(gc)) {
837
+ gc = this.peek();
838
+ while (gc !== undefined && isIdentContinue(gc)) {
839
+ this.bump(1);
840
+ gc = this.peek();
841
+ }
842
+ const name = this.#source.slice(start, this.#position);
843
+ return { token: { type: "GroupName", name }, span: this.span() };
844
+ }
845
+ // Invalid group name, return as error token
846
+ return { token: { type: "GroupName", name: "" }, span: this.span() };
847
+ }
848
+ }
849
+
850
+ // Check for date' pattern
851
+ if (this.#source.slice(this.#position, this.#position + 5) === "date'") {
852
+ this.bump(5);
853
+ return { token: { type: "DatePattern", value: this.#parseDatePattern() }, span: this.span() };
854
+ }
855
+
856
+ // Check for number (including negative)
857
+ const nextChar = this.peekNext();
858
+ if (isDigit(ch) || (ch === "-" && nextChar !== undefined && isDigit(nextChar))) {
859
+ return { token: this.#parseNumber(), span: this.span() };
860
+ }
861
+
862
+ // Check for identifier/keyword
863
+ if (isIdentStart(ch)) {
864
+ const start = this.#position;
865
+ let ic = this.peek();
866
+ while (ic !== undefined && isIdentContinue(ic)) {
867
+ this.bump(1);
868
+ ic = this.peek();
869
+ }
870
+ const ident = this.#source.slice(start, this.#position);
871
+
872
+ // Check for keywords
873
+ const keyword = KEYWORDS.get(ident);
874
+ if (keyword !== undefined) {
875
+ return { token: keyword, span: this.span() };
876
+ }
877
+
878
+ // Unknown identifier - treat as error
879
+ return undefined;
880
+ }
881
+
882
+ // Unknown character
883
+ this.bump(1);
884
+ return undefined;
885
+ }
886
+
887
+ /**
888
+ * Iterates over all tokens.
889
+ */
890
+ *[Symbol.iterator](): Iterator<
891
+ { token: Token; span: Span } | { error: EnvelopePatternError; span: Span }
892
+ > {
893
+ let result = this.next();
894
+ while (result !== undefined) {
895
+ yield result;
896
+ result = this.next();
897
+ }
898
+ }
899
+ }
900
+
901
+ /**
902
+ * Creates a new lexer for the given source.
903
+ */
904
+ export function lexer(source: string): Lexer {
905
+ return new Lexer(source);
906
+ }