@bcts/dcbor-pattern 1.0.0-alpha.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +48 -0
- package/README.md +14 -0
- package/dist/index.cjs +6561 -0
- package/dist/index.cjs.map +1 -0
- package/dist/index.d.cts +2732 -0
- package/dist/index.d.cts.map +1 -0
- package/dist/index.d.mts +2732 -0
- package/dist/index.d.mts.map +1 -0
- package/dist/index.iife.js +6562 -0
- package/dist/index.iife.js.map +1 -0
- package/dist/index.mjs +6244 -0
- package/dist/index.mjs.map +1 -0
- package/package.json +85 -0
- package/src/error.ts +333 -0
- package/src/format.ts +299 -0
- package/src/index.ts +20 -0
- package/src/interval.ts +230 -0
- package/src/parse/index.ts +95 -0
- package/src/parse/meta/and-parser.ts +47 -0
- package/src/parse/meta/capture-parser.ts +56 -0
- package/src/parse/meta/index.ts +13 -0
- package/src/parse/meta/not-parser.ts +28 -0
- package/src/parse/meta/or-parser.ts +47 -0
- package/src/parse/meta/primary-parser.ts +420 -0
- package/src/parse/meta/repeat-parser.ts +133 -0
- package/src/parse/meta/search-parser.ts +56 -0
- package/src/parse/parse-registry.ts +31 -0
- package/src/parse/structure/array-parser.ts +210 -0
- package/src/parse/structure/index.ts +9 -0
- package/src/parse/structure/map-parser.ts +128 -0
- package/src/parse/structure/tagged-parser.ts +269 -0
- package/src/parse/token.ts +997 -0
- package/src/parse/value/bool-parser.ts +33 -0
- package/src/parse/value/bytestring-parser.ts +42 -0
- package/src/parse/value/date-parser.ts +24 -0
- package/src/parse/value/digest-parser.ts +24 -0
- package/src/parse/value/index.ts +14 -0
- package/src/parse/value/known-value-parser.ts +24 -0
- package/src/parse/value/null-parser.ts +19 -0
- package/src/parse/value/number-parser.ts +19 -0
- package/src/parse/value/text-parser.ts +43 -0
- package/src/pattern/index.ts +740 -0
- package/src/pattern/match-registry.ts +137 -0
- package/src/pattern/matcher.ts +388 -0
- package/src/pattern/meta/and-pattern.ts +56 -0
- package/src/pattern/meta/any-pattern.ts +43 -0
- package/src/pattern/meta/capture-pattern.ts +57 -0
- package/src/pattern/meta/index.ts +168 -0
- package/src/pattern/meta/not-pattern.ts +70 -0
- package/src/pattern/meta/or-pattern.ts +56 -0
- package/src/pattern/meta/repeat-pattern.ts +117 -0
- package/src/pattern/meta/search-pattern.ts +298 -0
- package/src/pattern/meta/sequence-pattern.ts +72 -0
- package/src/pattern/structure/array-pattern/assigner.ts +95 -0
- package/src/pattern/structure/array-pattern/backtrack.ts +240 -0
- package/src/pattern/structure/array-pattern/helpers.ts +140 -0
- package/src/pattern/structure/array-pattern/index.ts +502 -0
- package/src/pattern/structure/index.ts +122 -0
- package/src/pattern/structure/map-pattern.ts +255 -0
- package/src/pattern/structure/tagged-pattern.ts +190 -0
- package/src/pattern/value/bool-pattern.ts +67 -0
- package/src/pattern/value/bytes-utils.ts +48 -0
- package/src/pattern/value/bytestring-pattern.ts +111 -0
- package/src/pattern/value/date-pattern.ts +162 -0
- package/src/pattern/value/digest-pattern.ts +136 -0
- package/src/pattern/value/index.ts +168 -0
- package/src/pattern/value/known-value-pattern.ts +123 -0
- package/src/pattern/value/null-pattern.ts +46 -0
- package/src/pattern/value/number-pattern.ts +181 -0
- package/src/pattern/value/text-pattern.ts +82 -0
- package/src/pattern/vm.ts +619 -0
- package/src/quantifier.ts +185 -0
- package/src/reluctance.ts +65 -0
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Search pattern parser.
|
|
3
|
+
*
|
|
4
|
+
* @module parse/meta/search-parser
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import type { Lexer } from "../token";
|
|
8
|
+
import type { Pattern } from "../../pattern";
|
|
9
|
+
import type { Result } from "../../error";
|
|
10
|
+
import { Ok, Err } from "../../error";
|
|
11
|
+
import { search } from "../../pattern";
|
|
12
|
+
import { parseOr } from "./or-parser";
|
|
13
|
+
|
|
14
|
+
/**
|
|
15
|
+
* Parse a search pattern `...(pattern)`.
|
|
16
|
+
*/
|
|
17
|
+
export const parseSearch = (lexer: Lexer): Result<Pattern> => {
|
|
18
|
+
// Expect opening parenthesis
|
|
19
|
+
const openResult = lexer.next();
|
|
20
|
+
if (openResult === undefined) {
|
|
21
|
+
return Err({ type: "UnexpectedEndOfInput" });
|
|
22
|
+
}
|
|
23
|
+
if (!openResult.ok) {
|
|
24
|
+
return openResult;
|
|
25
|
+
}
|
|
26
|
+
if (openResult.value.token.type !== "ParenOpen") {
|
|
27
|
+
return Err({
|
|
28
|
+
type: "UnexpectedToken",
|
|
29
|
+
token: openResult.value.token,
|
|
30
|
+
span: openResult.value.span,
|
|
31
|
+
});
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
// Parse the inner pattern
|
|
35
|
+
const innerResult = parseOr(lexer);
|
|
36
|
+
if (!innerResult.ok) {
|
|
37
|
+
return innerResult;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
// Expect closing parenthesis
|
|
41
|
+
const closeResult = lexer.next();
|
|
42
|
+
if (closeResult === undefined) {
|
|
43
|
+
return Err({ type: "ExpectedCloseParen", span: lexer.span() });
|
|
44
|
+
}
|
|
45
|
+
if (!closeResult.ok) {
|
|
46
|
+
return closeResult;
|
|
47
|
+
}
|
|
48
|
+
if (closeResult.value.token.type !== "ParenClose") {
|
|
49
|
+
return Err({
|
|
50
|
+
type: "ExpectedCloseParen",
|
|
51
|
+
span: closeResult.value.span,
|
|
52
|
+
});
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
return Ok(search(innerResult.value));
|
|
56
|
+
};
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Parse registry for resolving circular dependencies between parsers.
|
|
3
|
+
*
|
|
4
|
+
* @module parse/parse-registry
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import type { Lexer } from "./token";
|
|
8
|
+
import type { Pattern } from "../pattern";
|
|
9
|
+
import type { Result } from "../error";
|
|
10
|
+
|
|
11
|
+
/**
|
|
12
|
+
* The registered parseOr function.
|
|
13
|
+
*/
|
|
14
|
+
export let parseOrFn: ((lexer: Lexer) => Result<Pattern>) | undefined;
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* Registers the parseOr function.
|
|
18
|
+
*/
|
|
19
|
+
export const setParseOrFn = (fn: (lexer: Lexer) => Result<Pattern>): void => {
|
|
20
|
+
parseOrFn = fn;
|
|
21
|
+
};
|
|
22
|
+
|
|
23
|
+
/**
|
|
24
|
+
* Calls the registered parseOr function.
|
|
25
|
+
*/
|
|
26
|
+
export const parseOrFromRegistry = (lexer: Lexer): Result<Pattern> => {
|
|
27
|
+
if (parseOrFn === undefined) {
|
|
28
|
+
throw new Error("ParseOr function not initialized. Import parse/index to initialize.");
|
|
29
|
+
}
|
|
30
|
+
return parseOrFn(lexer);
|
|
31
|
+
};
|
|
@@ -0,0 +1,210 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Array pattern parser.
|
|
3
|
+
*
|
|
4
|
+
* @module parse/structure/array-parser
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import type { Lexer } from "../token";
|
|
8
|
+
import type { Pattern } from "../../pattern";
|
|
9
|
+
import type { Result } from "../../error";
|
|
10
|
+
import { Ok, Err } from "../../error";
|
|
11
|
+
import { sequence, or, and, not } from "../../pattern";
|
|
12
|
+
import {
|
|
13
|
+
arrayPatternWithLengthInterval,
|
|
14
|
+
arrayPatternWithElements,
|
|
15
|
+
} from "../../pattern/structure/array-pattern";
|
|
16
|
+
import { Interval } from "../../interval";
|
|
17
|
+
import { parseOrFromRegistry } from "../parse-registry";
|
|
18
|
+
|
|
19
|
+
/**
|
|
20
|
+
* Parse a bracket array pattern: [pattern] or [{n}] etc.
|
|
21
|
+
*/
|
|
22
|
+
export const parseBracketArray = (lexer: Lexer): Result<Pattern> => {
|
|
23
|
+
// Opening bracket was already consumed
|
|
24
|
+
const peeked = lexer.peekToken();
|
|
25
|
+
|
|
26
|
+
if (peeked === undefined) {
|
|
27
|
+
return Err({ type: "UnexpectedEndOfInput" });
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
if (!peeked.ok) {
|
|
31
|
+
return peeked;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
const token = peeked.value;
|
|
35
|
+
|
|
36
|
+
// Check for Range token (array length constraint like [{3}] or [{1,5}])
|
|
37
|
+
if (token.type === "Range") {
|
|
38
|
+
lexer.next(); // consume the Range token
|
|
39
|
+
const pattern = arrayPatternWithLengthInterval(token.quantifier.interval());
|
|
40
|
+
|
|
41
|
+
// Expect closing bracket
|
|
42
|
+
const closeResult = lexer.next();
|
|
43
|
+
if (closeResult === undefined) {
|
|
44
|
+
return Err({ type: "ExpectedCloseBracket", span: lexer.span() });
|
|
45
|
+
}
|
|
46
|
+
if (!closeResult.ok) {
|
|
47
|
+
return closeResult;
|
|
48
|
+
}
|
|
49
|
+
if (closeResult.value.token.type !== "BracketClose") {
|
|
50
|
+
return Err({
|
|
51
|
+
type: "ExpectedCloseBracket",
|
|
52
|
+
span: closeResult.value.span,
|
|
53
|
+
});
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
return Ok({
|
|
57
|
+
kind: "Structure",
|
|
58
|
+
pattern: { type: "Array", pattern },
|
|
59
|
+
});
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
// Check for closing bracket (empty array pattern [] - matches array with 0 elements)
|
|
63
|
+
if (token.type === "BracketClose") {
|
|
64
|
+
lexer.next(); // consume the closing bracket
|
|
65
|
+
return Ok({
|
|
66
|
+
kind: "Structure",
|
|
67
|
+
pattern: { type: "Array", pattern: arrayPatternWithLengthInterval(Interval.exactly(0)) },
|
|
68
|
+
});
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
// Parse as array with element pattern(s)
|
|
72
|
+
const elementPattern = parseArrayOr(lexer);
|
|
73
|
+
if (!elementPattern.ok) {
|
|
74
|
+
return elementPattern;
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
const pattern = arrayPatternWithElements(elementPattern.value);
|
|
78
|
+
|
|
79
|
+
// Expect closing bracket
|
|
80
|
+
const closeResult = lexer.next();
|
|
81
|
+
if (closeResult === undefined) {
|
|
82
|
+
return Err({ type: "ExpectedCloseBracket", span: lexer.span() });
|
|
83
|
+
}
|
|
84
|
+
if (!closeResult.ok) {
|
|
85
|
+
return closeResult;
|
|
86
|
+
}
|
|
87
|
+
if (closeResult.value.token.type !== "BracketClose") {
|
|
88
|
+
return Err({
|
|
89
|
+
type: "ExpectedCloseBracket",
|
|
90
|
+
span: closeResult.value.span,
|
|
91
|
+
});
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
return Ok({
|
|
95
|
+
kind: "Structure",
|
|
96
|
+
pattern: { type: "Array", pattern },
|
|
97
|
+
});
|
|
98
|
+
};
|
|
99
|
+
|
|
100
|
+
/**
|
|
101
|
+
* Parse OR patterns within array context.
|
|
102
|
+
*/
|
|
103
|
+
const parseArrayOr = (lexer: Lexer): Result<Pattern> => {
|
|
104
|
+
const patterns: Pattern[] = [];
|
|
105
|
+
const first = parseArrayAnd(lexer);
|
|
106
|
+
if (!first.ok) {
|
|
107
|
+
return first;
|
|
108
|
+
}
|
|
109
|
+
patterns.push(first.value);
|
|
110
|
+
|
|
111
|
+
while (true) {
|
|
112
|
+
const peeked = lexer.peekToken();
|
|
113
|
+
if (peeked === undefined || !peeked.ok || peeked.value.type !== "Or") {
|
|
114
|
+
break;
|
|
115
|
+
}
|
|
116
|
+
lexer.next(); // consume the OR token
|
|
117
|
+
|
|
118
|
+
const next = parseArrayAnd(lexer);
|
|
119
|
+
if (!next.ok) {
|
|
120
|
+
return next;
|
|
121
|
+
}
|
|
122
|
+
patterns.push(next.value);
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
if (patterns.length === 1) {
|
|
126
|
+
return Ok(patterns[0]);
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
return Ok(or(...patterns));
|
|
130
|
+
};
|
|
131
|
+
|
|
132
|
+
/**
|
|
133
|
+
* Parse AND patterns within array context.
|
|
134
|
+
*/
|
|
135
|
+
const parseArrayAnd = (lexer: Lexer): Result<Pattern> => {
|
|
136
|
+
const patterns: Pattern[] = [];
|
|
137
|
+
const first = parseArrayNot(lexer);
|
|
138
|
+
if (!first.ok) {
|
|
139
|
+
return first;
|
|
140
|
+
}
|
|
141
|
+
patterns.push(first.value);
|
|
142
|
+
|
|
143
|
+
while (true) {
|
|
144
|
+
const peeked = lexer.peekToken();
|
|
145
|
+
if (peeked === undefined || !peeked.ok || peeked.value.type !== "And") {
|
|
146
|
+
break;
|
|
147
|
+
}
|
|
148
|
+
lexer.next(); // consume the AND token
|
|
149
|
+
|
|
150
|
+
const next = parseArrayNot(lexer);
|
|
151
|
+
if (!next.ok) {
|
|
152
|
+
return next;
|
|
153
|
+
}
|
|
154
|
+
patterns.push(next.value);
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
if (patterns.length === 1) {
|
|
158
|
+
return Ok(patterns[0]);
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
return Ok(and(...patterns));
|
|
162
|
+
};
|
|
163
|
+
|
|
164
|
+
/**
|
|
165
|
+
* Parse NOT patterns within array context.
|
|
166
|
+
*/
|
|
167
|
+
const parseArrayNot = (lexer: Lexer): Result<Pattern> => {
|
|
168
|
+
const peeked = lexer.peekToken();
|
|
169
|
+
if (peeked !== undefined && peeked.ok && peeked.value.type === "Not") {
|
|
170
|
+
lexer.next(); // consume the NOT token
|
|
171
|
+
const inner = parseArrayNot(lexer); // right associative
|
|
172
|
+
if (!inner.ok) {
|
|
173
|
+
return inner;
|
|
174
|
+
}
|
|
175
|
+
return Ok(not(inner.value));
|
|
176
|
+
}
|
|
177
|
+
return parseArraySequence(lexer);
|
|
178
|
+
};
|
|
179
|
+
|
|
180
|
+
/**
|
|
181
|
+
* Parse sequence patterns within array context (comma-separated).
|
|
182
|
+
*/
|
|
183
|
+
const parseArraySequence = (lexer: Lexer): Result<Pattern> => {
|
|
184
|
+
const patterns: Pattern[] = [];
|
|
185
|
+
const first = parseOrFromRegistry(lexer);
|
|
186
|
+
if (!first.ok) {
|
|
187
|
+
return first;
|
|
188
|
+
}
|
|
189
|
+
patterns.push(first.value);
|
|
190
|
+
|
|
191
|
+
while (true) {
|
|
192
|
+
const peeked = lexer.peekToken();
|
|
193
|
+
if (peeked === undefined || !peeked.ok || peeked.value.type !== "Comma") {
|
|
194
|
+
break;
|
|
195
|
+
}
|
|
196
|
+
lexer.next(); // consume the comma
|
|
197
|
+
|
|
198
|
+
const next = parseOrFromRegistry(lexer);
|
|
199
|
+
if (!next.ok) {
|
|
200
|
+
return next;
|
|
201
|
+
}
|
|
202
|
+
patterns.push(next.value);
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
if (patterns.length === 1) {
|
|
206
|
+
return Ok(patterns[0]);
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
return Ok(sequence(...patterns));
|
|
210
|
+
};
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Map pattern parser.
|
|
3
|
+
*
|
|
4
|
+
* @module parse/structure/map-parser
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import type { Lexer } from "../token";
|
|
8
|
+
import type { Pattern } from "../../pattern";
|
|
9
|
+
import type { Result } from "../../error";
|
|
10
|
+
import { Ok, Err } from "../../error";
|
|
11
|
+
import { anyMap } from "../../pattern";
|
|
12
|
+
import {
|
|
13
|
+
mapPatternWithLengthInterval,
|
|
14
|
+
mapPatternWithConstraints,
|
|
15
|
+
} from "../../pattern/structure/map-pattern";
|
|
16
|
+
import { parseOrFromRegistry } from "../parse-registry";
|
|
17
|
+
|
|
18
|
+
/**
|
|
19
|
+
* Parse a bracket map pattern: {pattern: pattern} or {{n}} etc.
|
|
20
|
+
*/
|
|
21
|
+
export const parseBracketMap = (lexer: Lexer): Result<Pattern> => {
|
|
22
|
+
// Opening brace was already consumed
|
|
23
|
+
const peeked = lexer.peekToken();
|
|
24
|
+
|
|
25
|
+
if (peeked === undefined) {
|
|
26
|
+
return Err({ type: "UnexpectedEndOfInput" });
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
if (!peeked.ok) {
|
|
30
|
+
return peeked;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
const token = peeked.value;
|
|
34
|
+
|
|
35
|
+
// Check for closing brace (empty map - which means "any map")
|
|
36
|
+
if (token.type === "BraceClose") {
|
|
37
|
+
lexer.next(); // consume the closing brace
|
|
38
|
+
return Ok(anyMap());
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
// Check for Range token (map length constraint)
|
|
42
|
+
if (token.type === "Range") {
|
|
43
|
+
lexer.next(); // consume the Range token
|
|
44
|
+
const pattern = mapPatternWithLengthInterval(token.quantifier.interval());
|
|
45
|
+
|
|
46
|
+
// Expect closing brace
|
|
47
|
+
const closeResult = lexer.next();
|
|
48
|
+
if (closeResult === undefined) {
|
|
49
|
+
return Err({ type: "ExpectedCloseBrace", span: lexer.span() });
|
|
50
|
+
}
|
|
51
|
+
if (!closeResult.ok) {
|
|
52
|
+
return closeResult;
|
|
53
|
+
}
|
|
54
|
+
if (closeResult.value.token.type !== "BraceClose") {
|
|
55
|
+
return Err({
|
|
56
|
+
type: "ExpectedCloseBrace",
|
|
57
|
+
span: closeResult.value.span,
|
|
58
|
+
});
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
return Ok({
|
|
62
|
+
kind: "Structure",
|
|
63
|
+
pattern: { type: "Map", pattern },
|
|
64
|
+
});
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
// Parse as map with key-value constraints
|
|
68
|
+
const constraints: [Pattern, Pattern][] = [];
|
|
69
|
+
|
|
70
|
+
while (true) {
|
|
71
|
+
// Parse key pattern
|
|
72
|
+
const keyResult = parseOrFromRegistry(lexer);
|
|
73
|
+
if (!keyResult.ok) {
|
|
74
|
+
return keyResult;
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
// Expect colon
|
|
78
|
+
const colonResult = lexer.next();
|
|
79
|
+
if (colonResult === undefined) {
|
|
80
|
+
return Err({ type: "ExpectedColon", span: lexer.span() });
|
|
81
|
+
}
|
|
82
|
+
if (!colonResult.ok) {
|
|
83
|
+
return colonResult;
|
|
84
|
+
}
|
|
85
|
+
if (colonResult.value.token.type !== "Colon") {
|
|
86
|
+
return Err({
|
|
87
|
+
type: "ExpectedColon",
|
|
88
|
+
span: colonResult.value.span,
|
|
89
|
+
});
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
// Parse value pattern
|
|
93
|
+
const valueResult = parseOrFromRegistry(lexer);
|
|
94
|
+
if (!valueResult.ok) {
|
|
95
|
+
return valueResult;
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
constraints.push([keyResult.value, valueResult.value]);
|
|
99
|
+
|
|
100
|
+
// Check for comma or closing brace
|
|
101
|
+
const nextToken = lexer.peekToken();
|
|
102
|
+
if (nextToken?.ok !== true) {
|
|
103
|
+
return Err({ type: "UnexpectedEndOfInput" });
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
if (nextToken.value.type === "BraceClose") {
|
|
107
|
+
lexer.next(); // consume the closing brace
|
|
108
|
+
break;
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
if (nextToken.value.type === "Comma") {
|
|
112
|
+
lexer.next(); // consume the comma
|
|
113
|
+
continue;
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
return Err({
|
|
117
|
+
type: "UnexpectedToken",
|
|
118
|
+
token: nextToken.value,
|
|
119
|
+
span: lexer.span(),
|
|
120
|
+
});
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
// Create map pattern with constraints
|
|
124
|
+
return Ok({
|
|
125
|
+
kind: "Structure",
|
|
126
|
+
pattern: { type: "Map", pattern: mapPatternWithConstraints(constraints) },
|
|
127
|
+
});
|
|
128
|
+
};
|
|
@@ -0,0 +1,269 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tagged pattern parser.
|
|
3
|
+
*
|
|
4
|
+
* Supports the following syntax:
|
|
5
|
+
* - `tagged` - matches any tagged value
|
|
6
|
+
* - `tagged(value, pattern)` - matches tagged value with specific u64 tag and content pattern
|
|
7
|
+
* - `tagged(name, pattern)` - matches tagged value with named tag and content pattern
|
|
8
|
+
* - `tagged(/regex/, pattern)` - matches tagged value with tag name matching regex and content pattern
|
|
9
|
+
*
|
|
10
|
+
* @module parse/structure/tagged-parser
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
import type { Lexer } from "../token";
|
|
14
|
+
import type { Pattern } from "../../pattern";
|
|
15
|
+
import type { Result } from "../../error";
|
|
16
|
+
import { Ok, Err } from "../../error";
|
|
17
|
+
import { anyTagged } from "../../pattern";
|
|
18
|
+
import { createTag } from "@bcts/dcbor";
|
|
19
|
+
import {
|
|
20
|
+
taggedPatternWithTag,
|
|
21
|
+
taggedPatternWithName,
|
|
22
|
+
taggedPatternWithRegex,
|
|
23
|
+
} from "../../pattern/structure/tagged-pattern";
|
|
24
|
+
import { parse } from "../index";
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* Tag selector discriminated union.
|
|
28
|
+
*/
|
|
29
|
+
type TagSelector =
|
|
30
|
+
| { type: "Value"; value: number }
|
|
31
|
+
| { type: "Name"; name: string }
|
|
32
|
+
| { type: "Regex"; regex: RegExp };
|
|
33
|
+
|
|
34
|
+
/**
|
|
35
|
+
* Parse a tagged pattern from the `tagged` keyword.
|
|
36
|
+
*
|
|
37
|
+
* Supports:
|
|
38
|
+
* - `tagged` - matches any tagged value
|
|
39
|
+
* - `tagged(value, pattern)` - matches tagged value with specific tag number
|
|
40
|
+
* - `tagged(name, pattern)` - matches tagged value with named tag
|
|
41
|
+
* - `tagged(/regex/, pattern)` - matches tagged value with tag name matching regex
|
|
42
|
+
*/
|
|
43
|
+
export const parseTagged = (lexer: Lexer): Result<Pattern> => {
|
|
44
|
+
// Check if followed by opening parenthesis
|
|
45
|
+
const peeked = lexer.peekToken();
|
|
46
|
+
|
|
47
|
+
if (peeked === undefined || !peeked.ok || peeked.value.type !== "ParenOpen") {
|
|
48
|
+
// No parentheses, just "tagged" - matches any tagged value
|
|
49
|
+
return Ok(anyTagged());
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
// Consume the opening parenthesis
|
|
53
|
+
lexer.next();
|
|
54
|
+
|
|
55
|
+
// Get the remainder of the input for manual parsing
|
|
56
|
+
const remainder = lexer.remainder();
|
|
57
|
+
const remainderStart = lexer.position();
|
|
58
|
+
|
|
59
|
+
// Parse the tag selector and content pattern
|
|
60
|
+
const innerResult = parseTaggedInner(remainder, remainderStart);
|
|
61
|
+
if (!innerResult.ok) {
|
|
62
|
+
return innerResult;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
const [tagSelector, contentPattern, consumed] = innerResult.value;
|
|
66
|
+
|
|
67
|
+
// Advance the lexer by the consumed amount
|
|
68
|
+
lexer.bump(consumed);
|
|
69
|
+
|
|
70
|
+
// Expect closing parenthesis
|
|
71
|
+
const closeResult = lexer.next();
|
|
72
|
+
if (closeResult === undefined) {
|
|
73
|
+
return Err({ type: "ExpectedCloseParen", span: lexer.span() });
|
|
74
|
+
}
|
|
75
|
+
if (!closeResult.ok) {
|
|
76
|
+
return closeResult;
|
|
77
|
+
}
|
|
78
|
+
if (closeResult.value.token.type !== "ParenClose") {
|
|
79
|
+
return Err({
|
|
80
|
+
type: "ExpectedCloseParen",
|
|
81
|
+
span: closeResult.value.span,
|
|
82
|
+
});
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
// Create the pattern based on tag selector type
|
|
86
|
+
let taggedPattern;
|
|
87
|
+
switch (tagSelector.type) {
|
|
88
|
+
case "Value": {
|
|
89
|
+
const tag = createTag(BigInt(tagSelector.value));
|
|
90
|
+
taggedPattern = taggedPatternWithTag(tag, contentPattern);
|
|
91
|
+
break;
|
|
92
|
+
}
|
|
93
|
+
case "Name":
|
|
94
|
+
taggedPattern = taggedPatternWithName(tagSelector.name, contentPattern);
|
|
95
|
+
break;
|
|
96
|
+
case "Regex":
|
|
97
|
+
taggedPattern = taggedPatternWithRegex(tagSelector.regex, contentPattern);
|
|
98
|
+
break;
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
return Ok({
|
|
102
|
+
kind: "Structure",
|
|
103
|
+
pattern: { type: "Tagged", pattern: taggedPattern },
|
|
104
|
+
});
|
|
105
|
+
};
|
|
106
|
+
|
|
107
|
+
/**
|
|
108
|
+
* Parse the inner content of tagged(selector, pattern).
|
|
109
|
+
* Returns [TagSelector, Pattern, consumed_bytes].
|
|
110
|
+
*/
|
|
111
|
+
const parseTaggedInner = (
|
|
112
|
+
src: string,
|
|
113
|
+
remainderStart: number,
|
|
114
|
+
): Result<[TagSelector, Pattern, number]> => {
|
|
115
|
+
let pos = 0;
|
|
116
|
+
skipWhitespace(src, (p) => (pos = p), pos);
|
|
117
|
+
|
|
118
|
+
// Parse the tag selector (first parameter)
|
|
119
|
+
let tagSelector: TagSelector;
|
|
120
|
+
|
|
121
|
+
if (src[pos] === "/") {
|
|
122
|
+
// Regex pattern
|
|
123
|
+
const regexResult = parseTextRegex(src, pos);
|
|
124
|
+
if (!regexResult.ok) {
|
|
125
|
+
return regexResult;
|
|
126
|
+
}
|
|
127
|
+
const [regex, newPos] = regexResult.value;
|
|
128
|
+
pos = newPos;
|
|
129
|
+
tagSelector = { type: "Regex", regex };
|
|
130
|
+
} else {
|
|
131
|
+
// Could be a number or a name
|
|
132
|
+
const wordResult = parseBareWord(src, pos);
|
|
133
|
+
if (!wordResult.ok) {
|
|
134
|
+
return wordResult;
|
|
135
|
+
}
|
|
136
|
+
const [word, newPos] = wordResult.value;
|
|
137
|
+
pos = newPos;
|
|
138
|
+
|
|
139
|
+
const numValue = parseInt(word, 10);
|
|
140
|
+
if (!isNaN(numValue) && numValue.toString() === word) {
|
|
141
|
+
tagSelector = { type: "Value", value: numValue };
|
|
142
|
+
} else {
|
|
143
|
+
tagSelector = { type: "Name", name: word };
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
// Expect comma
|
|
148
|
+
skipWhitespace(src, (p) => (pos = p), pos);
|
|
149
|
+
if (pos >= src.length || src[pos] !== ",") {
|
|
150
|
+
return Err({ type: "UnexpectedEndOfInput" });
|
|
151
|
+
}
|
|
152
|
+
pos += 1;
|
|
153
|
+
skipWhitespace(src, (p) => (pos = p), pos);
|
|
154
|
+
|
|
155
|
+
// Parse the content pattern (second parameter)
|
|
156
|
+
// Handle nested parentheses to find the end of the pattern
|
|
157
|
+
const patternStart = pos;
|
|
158
|
+
let parenDepth = 0;
|
|
159
|
+
while (pos < src.length) {
|
|
160
|
+
const ch = src[pos];
|
|
161
|
+
if (ch === "(") {
|
|
162
|
+
parenDepth += 1;
|
|
163
|
+
} else if (ch === ")") {
|
|
164
|
+
if (parenDepth === 0) {
|
|
165
|
+
break; // This is the closing paren for our tagged()
|
|
166
|
+
}
|
|
167
|
+
parenDepth -= 1;
|
|
168
|
+
}
|
|
169
|
+
pos += 1;
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
const patternSrc = src.slice(patternStart, pos);
|
|
173
|
+
const trimmedPattern = patternSrc.trim();
|
|
174
|
+
const trimOffset = patternSrc.length - patternSrc.trimStart().length;
|
|
175
|
+
|
|
176
|
+
const contentResult = parse(trimmedPattern);
|
|
177
|
+
if (!contentResult.ok) {
|
|
178
|
+
// Adjust error spans to be relative to the original input
|
|
179
|
+
const error = contentResult.error;
|
|
180
|
+
if ("span" in error) {
|
|
181
|
+
const offset = remainderStart + patternStart + trimOffset;
|
|
182
|
+
const adjustedSpan = {
|
|
183
|
+
start: error.span.start + offset,
|
|
184
|
+
end: error.span.end + offset,
|
|
185
|
+
};
|
|
186
|
+
// Create a new error with the adjusted span
|
|
187
|
+
return Err({ ...error, span: adjustedSpan } as typeof error);
|
|
188
|
+
}
|
|
189
|
+
return contentResult;
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
return Ok([tagSelector, contentResult.value, pos]);
|
|
193
|
+
};
|
|
194
|
+
|
|
195
|
+
/**
|
|
196
|
+
* Parse a regex from the input string starting with /
|
|
197
|
+
*/
|
|
198
|
+
const parseTextRegex = (src: string, startPos: number): Result<[RegExp, number]> => {
|
|
199
|
+
let pos = startPos;
|
|
200
|
+
skipWhitespace(src, (p) => (pos = p), pos);
|
|
201
|
+
|
|
202
|
+
if (pos >= src.length || src[pos] !== "/") {
|
|
203
|
+
return Err({ type: "UnterminatedRegex", span: { start: pos, end: pos } });
|
|
204
|
+
}
|
|
205
|
+
pos += 1;
|
|
206
|
+
const start = pos;
|
|
207
|
+
let escape = false;
|
|
208
|
+
|
|
209
|
+
while (pos < src.length) {
|
|
210
|
+
const ch = src[pos];
|
|
211
|
+
pos += 1;
|
|
212
|
+
if (escape) {
|
|
213
|
+
escape = false;
|
|
214
|
+
continue;
|
|
215
|
+
}
|
|
216
|
+
if (ch === "\\") {
|
|
217
|
+
escape = true;
|
|
218
|
+
continue;
|
|
219
|
+
}
|
|
220
|
+
if (ch === "/") {
|
|
221
|
+
const inner = src.slice(start, pos - 1);
|
|
222
|
+
try {
|
|
223
|
+
const regex = new RegExp(inner);
|
|
224
|
+
skipWhitespace(src, (p) => (pos = p), pos);
|
|
225
|
+
return Ok([regex, pos]);
|
|
226
|
+
} catch {
|
|
227
|
+
return Err({ type: "InvalidRegex", span: { start, end: pos } });
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
return Err({ type: "UnterminatedRegex", span: { start: pos, end: pos } });
|
|
233
|
+
};
|
|
234
|
+
|
|
235
|
+
/**
|
|
236
|
+
* Parse a bare word (alphanumeric with hyphens and underscores).
|
|
237
|
+
*/
|
|
238
|
+
const parseBareWord = (src: string, startPos: number): Result<[string, number]> => {
|
|
239
|
+
let pos = startPos;
|
|
240
|
+
skipWhitespace(src, (p) => (pos = p), pos);
|
|
241
|
+
|
|
242
|
+
const start = pos;
|
|
243
|
+
while (pos < src.length) {
|
|
244
|
+
const ch = src[pos];
|
|
245
|
+
if (" \t\n\r\f,)".includes(ch)) {
|
|
246
|
+
break;
|
|
247
|
+
}
|
|
248
|
+
pos += 1;
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
if (start === pos) {
|
|
252
|
+
return Err({ type: "UnexpectedEndOfInput" });
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
const word = src.slice(start, pos);
|
|
256
|
+
skipWhitespace(src, (p) => (pos = p), pos);
|
|
257
|
+
|
|
258
|
+
return Ok([word, pos]);
|
|
259
|
+
};
|
|
260
|
+
|
|
261
|
+
/**
|
|
262
|
+
* Skip whitespace characters.
|
|
263
|
+
*/
|
|
264
|
+
const skipWhitespace = (src: string, setPos: (p: number) => void, pos: number): void => {
|
|
265
|
+
while (pos < src.length && " \t\n\r\f".includes(src[pos])) {
|
|
266
|
+
pos += 1;
|
|
267
|
+
}
|
|
268
|
+
setPos(pos);
|
|
269
|
+
};
|