@bcts/envelope-pattern 1.0.0-alpha.22 → 1.0.0-beta.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. package/dist/index.cjs +1291 -826
  2. package/dist/index.cjs.map +1 -1
  3. package/dist/index.d.cts +101 -59
  4. package/dist/index.d.cts.map +1 -1
  5. package/dist/index.d.mts +102 -60
  6. package/dist/index.d.mts.map +1 -1
  7. package/dist/index.iife.js +1643 -1179
  8. package/dist/index.iife.js.map +1 -1
  9. package/dist/index.mjs +1315 -853
  10. package/dist/index.mjs.map +1 -1
  11. package/package.json +13 -11
  12. package/src/error.ts +1 -1
  13. package/src/format.ts +19 -31
  14. package/src/parse/index.ts +17 -1010
  15. package/src/parse/leaf/array-parser.ts +36 -0
  16. package/src/parse/leaf/cbor-parser.ts +43 -0
  17. package/src/parse/leaf/date-parser.ts +81 -0
  18. package/src/parse/leaf/known-value-parser.ts +73 -0
  19. package/src/parse/leaf/null-parser.ts +16 -0
  20. package/src/parse/leaf/number-parser.ts +90 -0
  21. package/src/parse/leaf/tag-parser.ts +160 -0
  22. package/src/parse/meta/and-parser.ts +40 -0
  23. package/src/parse/meta/capture-parser.ts +50 -0
  24. package/src/parse/meta/group-parser.ts +77 -0
  25. package/src/parse/meta/not-parser.ts +30 -0
  26. package/src/parse/meta/or-parser.ts +36 -0
  27. package/src/parse/meta/primary-parser.ts +234 -0
  28. package/src/parse/meta/search-parser.ts +41 -0
  29. package/src/parse/meta/traverse-parser.ts +42 -0
  30. package/src/parse/structure/assertion-obj-parser.ts +44 -0
  31. package/src/parse/structure/assertion-parser.ts +22 -0
  32. package/src/parse/structure/assertion-pred-parser.ts +45 -0
  33. package/src/parse/structure/compressed-parser.ts +17 -0
  34. package/src/parse/structure/digest-parser.ts +132 -0
  35. package/src/parse/structure/elided-parser.ts +17 -0
  36. package/src/parse/structure/encrypted-parser.ts +17 -0
  37. package/src/parse/structure/node-parser.ts +54 -0
  38. package/src/parse/structure/object-parser.ts +32 -0
  39. package/src/parse/structure/obscured-parser.ts +17 -0
  40. package/src/parse/structure/predicate-parser.ts +32 -0
  41. package/src/parse/structure/subject-parser.ts +32 -0
  42. package/src/parse/structure/wrapped-parser.ts +36 -0
  43. package/src/pattern/dcbor-integration.ts +40 -8
  44. package/src/pattern/index.ts +29 -0
  45. package/src/pattern/leaf/array-pattern.ts +67 -169
  46. package/src/pattern/leaf/cbor-pattern.ts +37 -23
  47. package/src/pattern/leaf/index.ts +1 -1
  48. package/src/pattern/leaf/map-pattern.ts +21 -2
  49. package/src/pattern/leaf/tagged-pattern.ts +6 -1
  50. package/src/pattern/meta/search-pattern.ts +13 -38
  51. package/src/pattern/meta/traverse-pattern.ts +2 -2
  52. package/src/pattern/structure/assertions-pattern.ts +19 -53
  53. package/src/pattern/structure/digest-pattern.ts +18 -22
  54. package/src/pattern/structure/index.ts +3 -0
  55. package/src/pattern/structure/node-pattern.ts +10 -29
  56. package/src/pattern/structure/object-pattern.ts +2 -2
  57. package/src/pattern/structure/predicate-pattern.ts +2 -2
  58. package/src/pattern/structure/subject-pattern.ts +31 -4
  59. package/src/pattern/structure/wrapped-pattern.ts +28 -9
  60. package/src/pattern/vm.ts +4 -4
@@ -5,129 +5,44 @@
5
5
  *
6
6
  * @bcts/envelope-pattern - Parser entry point
7
7
  *
8
- * This is a 1:1 TypeScript port of bc-envelope-pattern-rust parse/mod.rs
9
- * Recursive descent parser for Gordian Envelope pattern syntax.
8
+ * This is a 1:1 TypeScript port of bc-envelope-pattern-rust parse/mod.rs.
9
+ *
10
+ * Recursive-descent parser for the Gordian Envelope pattern syntax. The
11
+ * parsing rules live under `parse/leaf/`, `parse/meta/`, and
12
+ * `parse/structure/`, mirroring the Rust crate's module layout.
10
13
  *
11
14
  * @module envelope-pattern/parse
12
15
  */
13
16
 
14
17
  import { parse as parseDcborPattern } from "@bcts/dcbor-pattern";
15
- import { parseDcborItemPartial } from "@bcts/dcbor-parse";
18
+ import { type Result, err, extraData, ok } from "../error";
19
+ import { type Pattern, convertDcborPatternToEnvelopePattern } from "../pattern";
16
20
  import { Lexer } from "./token";
17
- import {
18
- type Result,
19
- type Span,
20
- ok,
21
- err,
22
- unexpectedEndOfInput,
23
- extraData,
24
- invalidRegex,
25
- unexpectedToken,
26
- } from "../error";
27
- import {
28
- type Pattern,
29
- // Leaf pattern constructors
30
- any,
31
- anyBool,
32
- bool,
33
- anyText,
34
- text,
35
- textRegex,
36
- anyNumber,
37
- number,
38
- numberRange,
39
- numberGreaterThan,
40
- numberLessThan,
41
- anyByteString,
42
- byteString,
43
- anyDate,
44
- date,
45
- dateRange,
46
- dateEarliest,
47
- dateLatest,
48
- dateRegex,
49
- anyKnownValue,
50
- knownValue,
51
- anyArray,
52
- anyTag,
53
- anyCbor,
54
- cborValue,
55
- cborPattern,
56
- nullPattern,
57
- // Structure pattern constructors
58
- leaf,
59
- anyAssertion,
60
- assertionWithPredicate,
61
- assertionWithObject,
62
- anySubject,
63
- subject,
64
- anyPredicate,
65
- predicate,
66
- anyObject,
67
- object,
68
- digestPrefix,
69
- anyNode,
70
- obscured,
71
- elided,
72
- encrypted,
73
- compressed,
74
- wrapped,
75
- unwrapEnvelope,
76
- unwrapMatching,
77
- // Meta pattern constructors
78
- and,
79
- or,
80
- notMatching,
81
- capture,
82
- search,
83
- traverse,
84
- repeat,
85
- group,
86
- // Pattern types
87
- patternLeaf,
88
- patternStructure,
89
- // Specific pattern classes
90
- NumberPattern,
91
- ByteStringPattern,
92
- KnownValuePattern,
93
- ArrayPattern,
94
- DigestPattern,
95
- NodePattern,
96
- AssertionsPattern,
97
- leafNumber,
98
- leafByteString,
99
- leafKnownValue,
100
- leafArray,
101
- structureDigest,
102
- structureNode,
103
- structureAssertions,
104
- } from "../pattern";
105
- import { Quantifier, Reluctance } from "@bcts/dcbor-pattern";
106
- import { type KnownValue as KnownValueType } from "@bcts/known-values";
107
- import { CborDate } from "@bcts/dcbor";
21
+ import { parseOr } from "./meta/or-parser";
108
22
 
109
23
  // Re-export token types
110
24
  export { type Token, Lexer } from "./token";
111
25
 
112
26
  /**
113
27
  * Parse a pattern expression string into a Pattern.
28
+ *
29
+ * Mirrors Rust `Pattern::parse`: tries envelope-pattern parsing first;
30
+ * on failure falls back to dcbor-pattern parsing and converts the
31
+ * result into an envelope pattern via the
32
+ * `dcbor_integration::convert_dcbor_pattern_to_envelope_pattern` bridge.
114
33
  */
115
34
  export function parse(input: string): Result<Pattern> {
116
35
  const lexer = new Lexer(input);
117
36
 
118
- // Try envelope-pattern parsing first
119
37
  const result = parseOr(lexer);
120
38
  if (!result.ok) {
121
- // If envelope-pattern parsing failed, try dcbor-pattern as fallback
122
39
  const dcborResult = parseDcborPattern(input);
123
40
  if (dcborResult.ok) {
124
41
  return convertDcborPatternToEnvelopePattern(dcborResult.value);
125
42
  }
126
- // Both parsers failed, return the original envelope error
127
43
  return result;
128
44
  }
129
45
 
130
- // Check for extra data
131
46
  const next = lexer.next();
132
47
  if (next !== undefined) {
133
48
  return err(extraData(next.span));
@@ -138,923 +53,15 @@ export function parse(input: string): Result<Pattern> {
138
53
 
139
54
  /**
140
55
  * Parse a pattern, allowing extra data after the pattern.
56
+ *
57
+ * Returns the parsed pattern and the byte offset at which parsing
58
+ * stopped, mirroring `Pattern::parse_partial` in spirit.
141
59
  */
142
60
  export function parsePartial(input: string): Result<[Pattern, number]> {
143
61
  const lexer = new Lexer(input);
144
62
  const result = parseOr(lexer);
145
63
  if (!result.ok) {
146
- return result as Result<[Pattern, number]>;
64
+ return result;
147
65
  }
148
66
  return ok([result.value, lexer.position]);
149
67
  }
150
-
151
- /**
152
- * Convert a dcbor-pattern Pattern to an envelope-pattern Pattern.
153
- */
154
- function convertDcborPatternToEnvelopePattern(_dcborPattern: unknown): Result<Pattern> {
155
- // For now, wrap dcbor patterns as CBOR patterns
156
- // This is a simplified conversion - the dcbor pattern is matched by the any() pattern
157
- return ok(any());
158
- }
159
-
160
- // ============================================================================
161
- // Recursive Descent Parser
162
- // ============================================================================
163
-
164
- /**
165
- * Parse an Or expression: expr (| expr)*
166
- */
167
- function parseOr(lexer: Lexer): Result<Pattern> {
168
- const patterns: Pattern[] = [];
169
-
170
- const first = parseTraverse(lexer);
171
- if (!first.ok) return first;
172
- patterns.push(first.value);
173
-
174
- while (true) {
175
- const next = lexer.peekToken();
176
- if (next?.token.type !== "Or") {
177
- break;
178
- }
179
- lexer.next(); // consume the |
180
-
181
- const nextExpr = parseTraverse(lexer);
182
- if (!nextExpr.ok) return nextExpr;
183
- patterns.push(nextExpr.value);
184
- }
185
-
186
- if (patterns.length === 1) {
187
- return ok(patterns[0]);
188
- }
189
- return ok(or(patterns));
190
- }
191
-
192
- /**
193
- * Parse a Traverse expression: expr (-> expr)*
194
- */
195
- function parseTraverse(lexer: Lexer): Result<Pattern> {
196
- const patterns: Pattern[] = [];
197
-
198
- const first = parseAnd(lexer);
199
- if (!first.ok) return first;
200
- patterns.push(first.value);
201
-
202
- while (true) {
203
- const next = lexer.peekToken();
204
- if (next?.token.type !== "Traverse") {
205
- break;
206
- }
207
- lexer.next(); // consume the ->
208
-
209
- const nextExpr = parseAnd(lexer);
210
- if (!nextExpr.ok) return nextExpr;
211
- patterns.push(nextExpr.value);
212
- }
213
-
214
- if (patterns.length === 1) {
215
- return ok(patterns[0]);
216
- }
217
- return ok(traverse(patterns));
218
- }
219
-
220
- /**
221
- * Parse an And expression: expr (& expr)*
222
- */
223
- function parseAnd(lexer: Lexer): Result<Pattern> {
224
- const patterns: Pattern[] = [];
225
-
226
- const first = parseNot(lexer);
227
- if (!first.ok) return first;
228
- patterns.push(first.value);
229
-
230
- while (true) {
231
- const next = lexer.peekToken();
232
- if (next?.token.type !== "And") {
233
- break;
234
- }
235
- lexer.next(); // consume the &
236
-
237
- const nextExpr = parseNot(lexer);
238
- if (!nextExpr.ok) return nextExpr;
239
- patterns.push(nextExpr.value);
240
- }
241
-
242
- if (patterns.length === 1) {
243
- return ok(patterns[0]);
244
- }
245
- return ok(and(patterns));
246
- }
247
-
248
- /**
249
- * Parse a Not expression: !? group
250
- */
251
- function parseNot(lexer: Lexer): Result<Pattern> {
252
- const next = lexer.peekToken();
253
- if (next?.token.type === "Not") {
254
- lexer.next(); // consume the !
255
- const inner = parseGroup(lexer);
256
- if (!inner.ok) return inner;
257
- return ok(notMatching(inner.value));
258
- }
259
- return parseGroup(lexer);
260
- }
261
-
262
- /**
263
- * Parse a Group expression: primary quantifier?
264
- */
265
- function parseGroup(lexer: Lexer): Result<Pattern> {
266
- const primary = parsePrimary(lexer);
267
- if (!primary.ok) return primary;
268
-
269
- // Check for quantifier
270
- const next = lexer.peekToken();
271
- if (next === undefined) {
272
- return primary;
273
- }
274
-
275
- const tokenType = next.token.type;
276
- let quantifier: Quantifier | undefined;
277
-
278
- if (tokenType === "RepeatZeroOrMore") {
279
- lexer.next();
280
- quantifier = Quantifier.zeroOrMore(Reluctance.Greedy);
281
- } else if (tokenType === "RepeatZeroOrMoreLazy") {
282
- lexer.next();
283
- quantifier = Quantifier.zeroOrMore(Reluctance.Lazy);
284
- } else if (tokenType === "RepeatZeroOrMorePossessive") {
285
- lexer.next();
286
- quantifier = Quantifier.zeroOrMore(Reluctance.Possessive);
287
- } else if (tokenType === "RepeatOneOrMore") {
288
- lexer.next();
289
- quantifier = Quantifier.oneOrMore(Reluctance.Greedy);
290
- } else if (tokenType === "RepeatOneOrMoreLazy") {
291
- lexer.next();
292
- quantifier = Quantifier.oneOrMore(Reluctance.Lazy);
293
- } else if (tokenType === "RepeatOneOrMorePossessive") {
294
- lexer.next();
295
- quantifier = Quantifier.oneOrMore(Reluctance.Possessive);
296
- } else if (tokenType === "RepeatZeroOrOne") {
297
- lexer.next();
298
- quantifier = Quantifier.zeroOrOne(Reluctance.Greedy);
299
- } else if (tokenType === "RepeatZeroOrOneLazy") {
300
- lexer.next();
301
- quantifier = Quantifier.zeroOrOne(Reluctance.Lazy);
302
- } else if (tokenType === "RepeatZeroOrOnePossessive") {
303
- lexer.next();
304
- quantifier = Quantifier.zeroOrOne(Reluctance.Possessive);
305
- } else if (tokenType === "Range") {
306
- lexer.next();
307
- if (!next.token.value.ok) {
308
- return err(next.token.value.error);
309
- }
310
- quantifier = next.token.value.value;
311
- } else {
312
- // No quantifier found, return the primary expression as-is
313
- return primary;
314
- }
315
-
316
- return ok(repeat(primary.value, quantifier.min(), quantifier.max(), quantifier.reluctance()));
317
- }
318
-
319
- /**
320
- * Parse a primary expression (atoms and structure keywords).
321
- */
322
- function parsePrimary(lexer: Lexer): Result<Pattern> {
323
- const tokenResult = lexer.next();
324
- if (tokenResult === undefined) {
325
- return err(unexpectedEndOfInput());
326
- }
327
-
328
- const { token, span } = tokenResult;
329
-
330
- switch (token.type) {
331
- // Envelope-specific structure patterns
332
- case "Search":
333
- return parseSearch(lexer);
334
- case "Node":
335
- return parseNode(lexer);
336
- case "Assertion":
337
- return parseAssertion(lexer);
338
- case "AssertionPred":
339
- return parseAssertionPred(lexer);
340
- case "AssertionObj":
341
- return parseAssertionObj(lexer);
342
- case "Digest":
343
- return parseDigest(lexer);
344
- case "Obj":
345
- return parseObject(lexer);
346
- case "Obscured":
347
- return ok(obscured());
348
- case "Elided":
349
- return ok(elided());
350
- case "Encrypted":
351
- return ok(encrypted());
352
- case "Compressed":
353
- return ok(compressed());
354
- case "Pred":
355
- return parsePredicate(lexer);
356
- case "Subject":
357
- return parseSubject(lexer);
358
- case "Wrapped":
359
- return ok(wrapped());
360
- case "Unwrap":
361
- return parseUnwrap(lexer);
362
- case "Leaf":
363
- return ok(leaf());
364
-
365
- // Capture group
366
- case "GroupName":
367
- return parseCapture(lexer, token.name);
368
-
369
- // Grouping with parentheses
370
- case "ParenOpen":
371
- return parseParenGroup(lexer);
372
-
373
- // CBOR pattern
374
- case "Cbor":
375
- return parseCbor(lexer);
376
-
377
- // Simple patterns
378
- case "RepeatZeroOrMore":
379
- return ok(any()); // * means any
380
- case "BoolKeyword":
381
- return ok(anyBool());
382
- case "BoolTrue":
383
- return ok(bool(true));
384
- case "BoolFalse":
385
- return ok(bool(false));
386
- case "NumberKeyword":
387
- return ok(anyNumber());
388
- case "TextKeyword":
389
- return ok(anyText());
390
- case "StringLiteral":
391
- if (!token.value.ok) return err(token.value.error);
392
- return ok(text(token.value.value));
393
- case "UnsignedInteger":
394
- if (!token.value.ok) return err(token.value.error);
395
- return parseNumberRangeOrComparison(lexer, token.value.value);
396
- case "Integer":
397
- if (!token.value.ok) return err(token.value.error);
398
- return parseNumberRangeOrComparison(lexer, token.value.value);
399
- case "Float":
400
- if (!token.value.ok) return err(token.value.error);
401
- return parseNumberRangeOrComparison(lexer, token.value.value);
402
- case "GreaterThanOrEqual":
403
- return parseComparisonNumber(lexer, ">=");
404
- case "LessThanOrEqual":
405
- return parseComparisonNumber(lexer, "<=");
406
- case "GreaterThan":
407
- return parseComparisonNumber(lexer, ">");
408
- case "LessThan":
409
- return parseComparisonNumber(lexer, "<");
410
- case "NaN":
411
- return ok(patternLeaf(leafNumber(NumberPattern.nan())));
412
- case "Infinity":
413
- return ok(number(Infinity));
414
- case "NegativeInfinity":
415
- return ok(number(-Infinity));
416
- case "Regex":
417
- if (!token.value.ok) return err(token.value.error);
418
- try {
419
- return ok(textRegex(new RegExp(token.value.value)));
420
- } catch {
421
- return err(invalidRegex(span));
422
- }
423
- case "BracketOpen":
424
- return parseArray(lexer);
425
- case "ByteString":
426
- return ok(anyByteString());
427
- case "HexPattern":
428
- if (!token.value.ok) return err(token.value.error);
429
- return ok(byteString(token.value.value));
430
- case "HexBinaryRegex":
431
- if (!token.value.ok) return err(token.value.error);
432
- try {
433
- return ok(
434
- patternLeaf(leafByteString(ByteStringPattern.regex(new RegExp(token.value.value)))),
435
- );
436
- } catch {
437
- return err(invalidRegex(span));
438
- }
439
- case "DateKeyword":
440
- return ok(anyDate());
441
- case "DatePattern":
442
- if (!token.value.ok) return err(token.value.error);
443
- return parseDateContent(token.value.value, span);
444
- case "Tagged":
445
- return parseTag(lexer);
446
- case "Known":
447
- return ok(anyKnownValue());
448
- case "SingleQuotedPattern":
449
- if (!token.value.ok) return err(token.value.error);
450
- return parseKnownValueContent(token.value.value);
451
- case "SingleQuotedRegex":
452
- if (!token.value.ok) return err(token.value.error);
453
- try {
454
- return ok(
455
- patternLeaf(leafKnownValue(KnownValuePattern.regex(new RegExp(token.value.value)))),
456
- );
457
- } catch {
458
- return err(invalidRegex(span));
459
- }
460
- case "Null":
461
- return ok(nullPattern());
462
-
463
- // These tokens are not valid as primary expressions
464
- // They are handled by other parsers or are structural tokens
465
- case "And":
466
- case "Or":
467
- case "Not":
468
- case "Traverse":
469
- case "RepeatZeroOrMoreLazy":
470
- case "RepeatZeroOrMorePossessive":
471
- case "RepeatOneOrMore":
472
- case "RepeatOneOrMoreLazy":
473
- case "RepeatOneOrMorePossessive":
474
- case "RepeatZeroOrOne":
475
- case "RepeatZeroOrOneLazy":
476
- case "RepeatZeroOrOnePossessive":
477
- case "ParenClose":
478
- case "BracketClose":
479
- case "Comma":
480
- case "Ellipsis":
481
- case "Range":
482
- case "Identifier":
483
- return err(unexpectedToken(token, span));
484
- }
485
- }
486
-
487
- // ============================================================================
488
- // Helper Functions
489
- // ============================================================================
490
-
491
- /**
492
- * Parse a parenthesized group expression.
493
- */
494
- function parseParenGroup(lexer: Lexer): Result<Pattern> {
495
- const inner = parseOr(lexer);
496
- if (!inner.ok) return inner;
497
-
498
- const close = lexer.next();
499
- if (close?.token.type !== "ParenClose") {
500
- return err({ type: "ExpectedCloseParen", span: lexer.span() });
501
- }
502
-
503
- return ok(group(inner.value));
504
- }
505
-
506
- /**
507
- * Parse a capture group: @name pattern
508
- */
509
- function parseCapture(lexer: Lexer, name: string): Result<Pattern> {
510
- const inner = parseGroup(lexer);
511
- if (!inner.ok) return inner;
512
- return ok(capture(name, inner.value));
513
- }
514
-
515
- /**
516
- * Parse a search pattern: search(pattern)
517
- */
518
- function parseSearch(lexer: Lexer): Result<Pattern> {
519
- const open = lexer.next();
520
- if (open?.token.type !== "ParenOpen") {
521
- return err({ type: "ExpectedOpenParen", span: lexer.span() });
522
- }
523
-
524
- const inner = parseOr(lexer);
525
- if (!inner.ok) return inner;
526
-
527
- const close = lexer.next();
528
- if (close?.token.type !== "ParenClose") {
529
- return err({ type: "ExpectedCloseParen", span: lexer.span() });
530
- }
531
-
532
- return ok(search(inner.value));
533
- }
534
-
535
- /**
536
- * Parse number with possible range or comparison.
537
- */
538
- function parseNumberRangeOrComparison(lexer: Lexer, firstValue: number): Result<Pattern> {
539
- const next = lexer.peekToken();
540
- if (next === undefined) {
541
- return ok(number(firstValue));
542
- }
543
-
544
- if (next.token.type === "Ellipsis") {
545
- lexer.next(); // consume ...
546
- const endToken = lexer.next();
547
- if (endToken === undefined) {
548
- return err(unexpectedEndOfInput());
549
- }
550
-
551
- let endValue: number;
552
- if (endToken.token.type === "UnsignedInteger" || endToken.token.type === "Integer") {
553
- if (!endToken.token.value.ok) return err(endToken.token.value.error);
554
- endValue = endToken.token.value.value;
555
- } else if (endToken.token.type === "Float") {
556
- if (!endToken.token.value.ok) return err(endToken.token.value.error);
557
- endValue = endToken.token.value.value;
558
- } else {
559
- return err(unexpectedToken(endToken.token, endToken.span));
560
- }
561
-
562
- return ok(numberRange(firstValue, endValue));
563
- }
564
-
565
- return ok(number(firstValue));
566
- }
567
-
568
- /**
569
- * Parse comparison number: >=n, <=n, >n, <n
570
- */
571
- function parseComparisonNumber(lexer: Lexer, op: string): Result<Pattern> {
572
- const numToken = lexer.next();
573
- if (numToken === undefined) {
574
- return err(unexpectedEndOfInput());
575
- }
576
-
577
- let value: number;
578
- if (numToken.token.type === "UnsignedInteger" || numToken.token.type === "Integer") {
579
- if (!numToken.token.value.ok) return err(numToken.token.value.error);
580
- value = numToken.token.value.value;
581
- } else if (numToken.token.type === "Float") {
582
- if (!numToken.token.value.ok) return err(numToken.token.value.error);
583
- value = numToken.token.value.value;
584
- } else {
585
- return err(unexpectedToken(numToken.token, numToken.span));
586
- }
587
-
588
- switch (op) {
589
- case ">=":
590
- return ok(patternLeaf(leafNumber(NumberPattern.greaterThanOrEqual(value))));
591
- case "<=":
592
- return ok(patternLeaf(leafNumber(NumberPattern.lessThanOrEqual(value))));
593
- case ">":
594
- return ok(numberGreaterThan(value));
595
- case "<":
596
- return ok(numberLessThan(value));
597
- default:
598
- return ok(number(value));
599
- }
600
- }
601
-
602
- /**
603
- * Parse an array pattern.
604
- */
605
- function parseArray(lexer: Lexer): Result<Pattern> {
606
- // Check for empty array or simple patterns
607
- const first = lexer.peekToken();
608
- if (first === undefined) {
609
- return err(unexpectedEndOfInput());
610
- }
611
-
612
- if (first.token.type === "BracketClose") {
613
- lexer.next(); // consume ]
614
- return ok(patternLeaf(leafArray(ArrayPattern.count(0))));
615
- }
616
-
617
- if (first.token.type === "RepeatZeroOrMore") {
618
- lexer.next(); // consume *
619
- const close = lexer.next();
620
- if (close?.token.type !== "BracketClose") {
621
- return err({ type: "ExpectedCloseBracket", span: lexer.span() });
622
- }
623
- return ok(anyArray());
624
- }
625
-
626
- // Parse the inner pattern(s)
627
- const patterns: Pattern[] = [];
628
-
629
- while (true) {
630
- const next = lexer.peekToken();
631
- if (next === undefined) {
632
- return err(unexpectedEndOfInput());
633
- }
634
-
635
- if (next.token.type === "BracketClose") {
636
- lexer.next(); // consume ]
637
- break;
638
- }
639
-
640
- const pattern = parseOr(lexer);
641
- if (!pattern.ok) return pattern;
642
- patterns.push(pattern.value);
643
-
644
- const afterPattern = lexer.peekToken();
645
- if (afterPattern === undefined) {
646
- return err(unexpectedEndOfInput());
647
- }
648
-
649
- if (afterPattern.token.type === "Comma") {
650
- lexer.next(); // consume ,
651
- } else if (afterPattern.token.type !== "BracketClose") {
652
- return err(unexpectedToken(afterPattern.token, afterPattern.span));
653
- }
654
- }
655
-
656
- if (patterns.length === 0) {
657
- return ok(patternLeaf(leafArray(ArrayPattern.count(0))));
658
- }
659
-
660
- return ok(patternLeaf(leafArray(ArrayPattern.withPatterns(patterns))));
661
- }
662
-
663
- /**
664
- * Parse a tag pattern.
665
- */
666
- function parseTag(lexer: Lexer): Result<Pattern> {
667
- const open = lexer.next();
668
- if (open?.token.type !== "ParenOpen") {
669
- return ok(anyTag());
670
- }
671
-
672
- // Parse tag number or pattern
673
- const tagToken = lexer.next();
674
- if (tagToken === undefined) {
675
- return err(unexpectedEndOfInput());
676
- }
677
-
678
- if (tagToken.token.type !== "UnsignedInteger") {
679
- return err(unexpectedToken(tagToken.token, tagToken.span));
680
- }
681
-
682
- if (!tagToken.token.value.ok) return err(tagToken.token.value.error);
683
- // tagToken.token.value.value contains the tag number for future tag-specific matching
684
-
685
- const close = lexer.next();
686
- if (close?.token.type !== "ParenClose") {
687
- return err({ type: "ExpectedCloseParen", span: lexer.span() });
688
- }
689
-
690
- // Create a tagged pattern with the specific tag
691
- // For now, just match the tag number
692
- return ok(anyTag()); // Simplified - full implementation would match specific tag
693
- }
694
-
695
- /**
696
- * Parse date content from date'...' pattern.
697
- */
698
- function parseDateContent(content: string, span: Span): Result<Pattern> {
699
- // Check for regex syntax: /pattern/
700
- if (content.startsWith("/") && content.endsWith("/")) {
701
- const regexStr = content.slice(1, -1);
702
- try {
703
- return ok(dateRegex(new RegExp(regexStr)));
704
- } catch {
705
- return err(invalidRegex(span));
706
- }
707
- }
708
-
709
- // Check for range syntax: date1...date2, date1..., ...date2
710
- const rangeIdx = content.indexOf("...");
711
- if (rangeIdx !== -1) {
712
- const left = content.slice(0, rangeIdx).trim();
713
- const right = content.slice(rangeIdx + 3).trim();
714
-
715
- if (left.length === 0 && right.length > 0) {
716
- // ...date2 → latest
717
- const parsed = Date.parse(right);
718
- if (isNaN(parsed)) return err({ type: "InvalidDateFormat", span });
719
- return ok(dateLatest(CborDate.fromDatetime(new Date(parsed))));
720
- }
721
- if (left.length > 0 && right.length === 0) {
722
- // date1... → earliest
723
- const parsed = Date.parse(left);
724
- if (isNaN(parsed)) return err({ type: "InvalidDateFormat", span });
725
- return ok(dateEarliest(CborDate.fromDatetime(new Date(parsed))));
726
- }
727
- if (left.length > 0 && right.length > 0) {
728
- // date1...date2 → range
729
- const parsedStart = Date.parse(left);
730
- const parsedEnd = Date.parse(right);
731
- if (isNaN(parsedStart) || isNaN(parsedEnd)) return err({ type: "InvalidDateFormat", span });
732
- return ok(
733
- dateRange(
734
- CborDate.fromDatetime(new Date(parsedStart)),
735
- CborDate.fromDatetime(new Date(parsedEnd)),
736
- ),
737
- );
738
- }
739
- return err({ type: "InvalidDateFormat", span });
740
- }
741
-
742
- // Simple exact date
743
- const parsed = Date.parse(content);
744
- if (isNaN(parsed)) {
745
- return err({ type: "InvalidDateFormat", span });
746
- }
747
-
748
- const cborDate = CborDate.fromDatetime(new Date(parsed));
749
- return ok(date(cborDate));
750
- }
751
-
752
- /**
753
- * Parse known value content from '...' pattern.
754
- */
755
- function parseKnownValueContent(content: string): Result<Pattern> {
756
- // Try to parse as number first
757
- const numValue = parseInt(content, 10);
758
- if (!isNaN(numValue)) {
759
- const kv = { value: () => BigInt(numValue) } as unknown as KnownValueType;
760
- return ok(knownValue(kv));
761
- }
762
-
763
- // Try to find by name in known values
764
- // For now, just create a named pattern
765
- return ok(patternLeaf(leafKnownValue(KnownValuePattern.named(content))));
766
- }
767
-
768
- /**
769
- * Parse CBOR pattern.
770
- *
771
- * Matches Rust parse_cbor: tries dcbor-pattern regex first (/keyword/),
772
- * then CBOR diagnostic notation via parseDcborItemPartial, then falls
773
- * back to parseOr for envelope pattern expressions.
774
- */
775
- function parseCbor(lexer: Lexer): Result<Pattern> {
776
- // Check for optional content in parentheses
777
- const next = lexer.peekToken();
778
- if (next?.token.type !== "ParenOpen") {
779
- return ok(anyCbor()); // cbor matches any CBOR value
780
- }
781
-
782
- lexer.next(); // consume (
783
-
784
- // Check for dcbor-pattern regex syntax: cbor(/keyword/)
785
- // Use peek() (character-level, non-destructive) instead of peekToken()
786
- // to avoid the lexer advancing past the CBOR content.
787
- if (lexer.peek() === "/") {
788
- const regexTokenResult = lexer.next(); // tokenize /pattern/
789
- if (regexTokenResult?.token.type === "Regex") {
790
- const regexToken = regexTokenResult.token;
791
- if (!regexToken.value.ok) return err(regexToken.value.error);
792
- const keyword = regexToken.value.value;
793
-
794
- // Parse the keyword as a dcbor-pattern expression
795
- const dcborResult = parseDcborPattern(keyword);
796
- if (!dcborResult.ok) {
797
- return err(unexpectedToken(regexToken, regexTokenResult.span));
798
- }
799
-
800
- const close = lexer.next();
801
- if (close?.token.type !== "ParenClose") {
802
- return err({ type: "ExpectedCloseParen", span: lexer.span() });
803
- }
804
-
805
- return ok(cborPattern(dcborResult.value));
806
- }
807
- }
808
-
809
- // Try to parse inner content as CBOR diagnostic notation
810
- // (matching Rust utils::parse_cbor_inner which calls parse_dcbor_item_partial)
811
- const remaining = lexer.remainder();
812
- const cborResult = parseDcborItemPartial(remaining);
813
- if (cborResult.ok) {
814
- const [cborData, consumed] = cborResult.value;
815
- lexer.bump(consumed);
816
- // Skip whitespace before closing paren
817
- while (lexer.peek() === " " || lexer.peek() === "\t" || lexer.peek() === "\n") {
818
- lexer.bump(1);
819
- }
820
- const close = lexer.next();
821
- if (close?.token.type !== "ParenClose") {
822
- return err({ type: "ExpectedCloseParen", span: lexer.span() });
823
- }
824
- return ok(cborValue(cborData));
825
- }
826
-
827
- // Fallback: try parsing as a regular pattern expression
828
- const inner = parseOr(lexer);
829
- if (!inner.ok) return inner;
830
-
831
- const close = lexer.next();
832
- if (close?.token.type !== "ParenClose") {
833
- return err({ type: "ExpectedCloseParen", span: lexer.span() });
834
- }
835
-
836
- return inner;
837
- }
838
-
839
- // ============================================================================
840
- // Structure Pattern Parsers
841
- // ============================================================================
842
-
843
- function parseNode(lexer: Lexer): Result<Pattern> {
844
- const next = lexer.peekToken();
845
- if (next?.token.type !== "ParenOpen") {
846
- return ok(anyNode());
847
- }
848
-
849
- lexer.next(); // consume (
850
-
851
- // Check for assertion count range: node({n,m}), node({n}), node({n,})
852
- const afterParen = lexer.peekToken();
853
- if (afterParen?.token.type === "Range") {
854
- lexer.next(); // consume Range token
855
- const rangeToken = afterParen.token;
856
- if (!rangeToken.value.ok) return err(rangeToken.value.error);
857
- const quantifier = rangeToken.value.value;
858
- const interval = quantifier.interval();
859
-
860
- const close = lexer.next();
861
- if (close?.token.type !== "ParenClose") {
862
- return err({ type: "ExpectedCloseParen", span: lexer.span() });
863
- }
864
-
865
- return ok(patternStructure(structureNode(NodePattern.fromInterval(interval))));
866
- }
867
-
868
- const inner = parseOr(lexer);
869
- if (!inner.ok) return inner;
870
-
871
- const close = lexer.next();
872
- if (close?.token.type !== "ParenClose") {
873
- return err({ type: "ExpectedCloseParen", span: lexer.span() });
874
- }
875
-
876
- return ok(patternStructure(structureNode(NodePattern.withSubject(inner.value))));
877
- }
878
-
879
- function parseAssertion(lexer: Lexer): Result<Pattern> {
880
- const next = lexer.peekToken();
881
- if (next?.token.type !== "ParenOpen") {
882
- return ok(anyAssertion());
883
- }
884
-
885
- lexer.next(); // consume (
886
-
887
- // Parse predicate pattern
888
- const pred = parseOr(lexer);
889
- if (!pred.ok) return pred;
890
-
891
- const comma = lexer.next();
892
- if (comma?.token.type !== "Comma") {
893
- return err(unexpectedToken(comma?.token ?? { type: "Null" }, comma?.span ?? lexer.span()));
894
- }
895
-
896
- // Parse object pattern
897
- const obj = parseOr(lexer);
898
- if (!obj.ok) return obj;
899
-
900
- const close = lexer.next();
901
- if (close?.token.type !== "ParenClose") {
902
- return err({ type: "ExpectedCloseParen", span: lexer.span() });
903
- }
904
-
905
- return ok(
906
- patternStructure(structureAssertions(AssertionsPattern.withBoth(pred.value, obj.value))),
907
- );
908
- }
909
-
910
- function parseAssertionPred(lexer: Lexer): Result<Pattern> {
911
- const next = lexer.peekToken();
912
- if (next?.token.type !== "ParenOpen") {
913
- return ok(anyAssertion());
914
- }
915
-
916
- lexer.next(); // consume (
917
- const inner = parseOr(lexer);
918
- if (!inner.ok) return inner;
919
-
920
- const close = lexer.next();
921
- if (close?.token.type !== "ParenClose") {
922
- return err({ type: "ExpectedCloseParen", span: lexer.span() });
923
- }
924
-
925
- return ok(assertionWithPredicate(inner.value));
926
- }
927
-
928
- function parseAssertionObj(lexer: Lexer): Result<Pattern> {
929
- const next = lexer.peekToken();
930
- if (next?.token.type !== "ParenOpen") {
931
- return ok(anyAssertion());
932
- }
933
-
934
- lexer.next(); // consume (
935
- const inner = parseOr(lexer);
936
- if (!inner.ok) return inner;
937
-
938
- const close = lexer.next();
939
- if (close?.token.type !== "ParenClose") {
940
- return err({ type: "ExpectedCloseParen", span: lexer.span() });
941
- }
942
-
943
- return ok(assertionWithObject(inner.value));
944
- }
945
-
946
- function parseDigest(lexer: Lexer): Result<Pattern> {
947
- const next = lexer.peekToken();
948
- if (next?.token.type !== "ParenOpen") {
949
- return ok(patternStructure(structureDigest(DigestPattern.any())));
950
- }
951
-
952
- lexer.next(); // consume (
953
-
954
- // Parse digest hex pattern
955
- const digestToken = lexer.next();
956
- if (digestToken === undefined) {
957
- return err(unexpectedEndOfInput());
958
- }
959
-
960
- if (digestToken.token.type === "HexPattern") {
961
- if (!digestToken.token.value.ok) return err(digestToken.token.value.error);
962
- const close = lexer.next();
963
- if (close?.token.type !== "ParenClose") {
964
- return err({ type: "ExpectedCloseParen", span: lexer.span() });
965
- }
966
- return ok(digestPrefix(digestToken.token.value.value));
967
- }
968
-
969
- // Accept raw hex string identifiers: digest(a1b2c3)
970
- if (digestToken.token.type === "Identifier") {
971
- const hexStr = digestToken.token.value;
972
- // Validate hex string: must be even length and all hex digits
973
- if (hexStr.length === 0 || hexStr.length % 2 !== 0 || !/^[0-9a-fA-F]+$/.test(hexStr)) {
974
- return err({ type: "InvalidHexString", span: digestToken.span });
975
- }
976
- const bytes = new Uint8Array(hexStr.length / 2);
977
- for (let i = 0; i < hexStr.length; i += 2) {
978
- bytes[i / 2] = Number.parseInt(hexStr.slice(i, i + 2), 16);
979
- }
980
- const close = lexer.next();
981
- if (close?.token.type !== "ParenClose") {
982
- return err({ type: "ExpectedCloseParen", span: lexer.span() });
983
- }
984
- return ok(digestPrefix(bytes));
985
- }
986
-
987
- return err(unexpectedToken(digestToken.token, digestToken.span));
988
- }
989
-
990
- function parseObject(lexer: Lexer): Result<Pattern> {
991
- const next = lexer.peekToken();
992
- if (next?.token.type !== "ParenOpen") {
993
- return ok(anyObject());
994
- }
995
-
996
- lexer.next(); // consume (
997
- const inner = parseOr(lexer);
998
- if (!inner.ok) return inner;
999
-
1000
- const close = lexer.next();
1001
- if (close?.token.type !== "ParenClose") {
1002
- return err({ type: "ExpectedCloseParen", span: lexer.span() });
1003
- }
1004
-
1005
- return ok(object(inner.value));
1006
- }
1007
-
1008
- function parsePredicate(lexer: Lexer): Result<Pattern> {
1009
- const next = lexer.peekToken();
1010
- if (next?.token.type !== "ParenOpen") {
1011
- return ok(anyPredicate());
1012
- }
1013
-
1014
- lexer.next(); // consume (
1015
- const inner = parseOr(lexer);
1016
- if (!inner.ok) return inner;
1017
-
1018
- const close = lexer.next();
1019
- if (close?.token.type !== "ParenClose") {
1020
- return err({ type: "ExpectedCloseParen", span: lexer.span() });
1021
- }
1022
-
1023
- return ok(predicate(inner.value));
1024
- }
1025
-
1026
- function parseSubject(lexer: Lexer): Result<Pattern> {
1027
- const next = lexer.peekToken();
1028
- if (next?.token.type !== "ParenOpen") {
1029
- return ok(anySubject());
1030
- }
1031
-
1032
- lexer.next(); // consume (
1033
- const inner = parseOr(lexer);
1034
- if (!inner.ok) return inner;
1035
-
1036
- const close = lexer.next();
1037
- if (close?.token.type !== "ParenClose") {
1038
- return err({ type: "ExpectedCloseParen", span: lexer.span() });
1039
- }
1040
-
1041
- return ok(subject(inner.value));
1042
- }
1043
-
1044
- function parseUnwrap(lexer: Lexer): Result<Pattern> {
1045
- const next = lexer.peekToken();
1046
- if (next?.token.type !== "ParenOpen") {
1047
- return ok(unwrapEnvelope());
1048
- }
1049
-
1050
- lexer.next(); // consume (
1051
- const inner = parseOr(lexer);
1052
- if (!inner.ok) return inner;
1053
-
1054
- const close = lexer.next();
1055
- if (close?.token.type !== "ParenClose") {
1056
- return err({ type: "ExpectedCloseParen", span: lexer.span() });
1057
- }
1058
-
1059
- return ok(unwrapMatching(inner.value));
1060
- }