@bcts/dcbor-parse 1.0.0-alpha.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/parse.ts ADDED
@@ -0,0 +1,411 @@
1
+ /**
2
+ * @bcts/dcbor-parse - Parse module
3
+ *
4
+ * This is a 1:1 TypeScript port of bc-dcbor-parse-rust parse.rs
5
+ *
6
+ * @module dcbor-parse/parse
7
+ */
8
+
9
+ import { type Cbor, cbor, CborMap, getGlobalTagsStore } from "@bcts/dcbor";
10
+ import { KnownValue, KNOWN_VALUES } from "@bcts/known-values";
11
+ import type { UR } from "@bcts/uniform-resources";
12
+ import {
13
+ type Span,
14
+ span,
15
+ parseError as PE,
16
+ type ParseResult,
17
+ ok,
18
+ err,
19
+ isDefaultError,
20
+ } from "./error";
21
+ import { type Token, Lexer } from "./token";
22
+
23
+ /**
24
+ * Parses a dCBOR item from a string input.
25
+ *
26
+ * This function takes a string slice containing a dCBOR diagnostic notation
27
+ * encoded value and attempts to parse it into a `Cbor` object. If the input
28
+ * contains extra tokens after a valid item, an error is returned.
29
+ *
30
+ * @param src - A string containing the dCBOR-encoded data.
31
+ * @returns `Ok(Cbor)` if parsing is successful and the input contains exactly one
32
+ * valid dCBOR item, which itself might be an atomic value like a number or
33
+ * string, or a complex value like an array or map.
34
+ * `Err(ParseError)` if parsing fails or if extra tokens are found after the item.
35
+ *
36
+ * @example
37
+ * ```typescript
38
+ * const result = parseDcborItem("[1, 2, 3]");
39
+ * if (result.ok) {
40
+ * console.log(result.value.toDiagnostic()); // "[1, 2, 3]"
41
+ * }
42
+ * ```
43
+ */
44
+ export function parseDcborItem(src: string): ParseResult<Cbor> {
45
+ const lexer = new Lexer(src);
46
+ const firstTokenResult = expectToken(lexer);
47
+
48
+ if (!firstTokenResult.ok) {
49
+ if (firstTokenResult.error.type === "UnexpectedEndOfInput") {
50
+ return err(PE.emptyInput());
51
+ }
52
+ return firstTokenResult;
53
+ }
54
+
55
+ const parseResult = parseItemToken(firstTokenResult.value, lexer);
56
+ if (!parseResult.ok) {
57
+ return parseResult;
58
+ }
59
+
60
+ // Check for extra data
61
+ const nextToken = lexer.next();
62
+ if (nextToken !== undefined) {
63
+ return err(PE.extraData(lexer.span()));
64
+ }
65
+
66
+ return parseResult;
67
+ }
68
+
69
+ /**
70
+ * Parses a dCBOR item from the beginning of a string and returns the parsed
71
+ * `Cbor` along with the number of bytes consumed.
72
+ *
73
+ * Unlike `parseDcborItem`, this function succeeds even if additional
74
+ * characters follow the first item. The returned index points to the first
75
+ * unparsed character after skipping any trailing whitespace or comments.
76
+ *
77
+ * @param src - A string containing the dCBOR-encoded data.
78
+ * @returns `Ok([Cbor, number])` with the parsed item and bytes consumed.
79
+ *
80
+ * @example
81
+ * ```typescript
82
+ * const result = parseDcborItemPartial("true )");
83
+ * if (result.ok) {
84
+ * const [cbor, used] = result.value;
85
+ * console.log(cbor.toDiagnostic()); // "true"
86
+ * console.log(used); // 5
87
+ * }
88
+ * ```
89
+ */
90
+ export function parseDcborItemPartial(src: string): ParseResult<[Cbor, number]> {
91
+ const lexer = new Lexer(src);
92
+ const firstTokenResult = expectToken(lexer);
93
+
94
+ if (!firstTokenResult.ok) {
95
+ if (firstTokenResult.error.type === "UnexpectedEndOfInput") {
96
+ return err(PE.emptyInput());
97
+ }
98
+ return firstTokenResult;
99
+ }
100
+
101
+ const parseResult = parseItemToken(firstTokenResult.value, lexer);
102
+ if (!parseResult.ok) {
103
+ return parseResult;
104
+ }
105
+
106
+ // Determine consumed bytes
107
+ const nextToken = lexer.next();
108
+ const consumed = nextToken !== undefined ? lexer.span().start : src.length;
109
+
110
+ return ok([parseResult.value, consumed]);
111
+ }
112
+
113
+ // === Private Functions ===
114
+
115
+ function parseItem(lexer: Lexer): ParseResult<Cbor> {
116
+ const tokenResult = expectToken(lexer);
117
+ if (!tokenResult.ok) {
118
+ return tokenResult;
119
+ }
120
+ return parseItemToken(tokenResult.value, lexer);
121
+ }
122
+
123
+ function expectToken(lexer: Lexer): ParseResult<Token> {
124
+ const spanBefore = lexer.span();
125
+ const result = lexer.next();
126
+
127
+ if (result === undefined) {
128
+ return err(PE.unexpectedEndOfInput());
129
+ }
130
+
131
+ if (!result.ok) {
132
+ if (isDefaultError(result.error)) {
133
+ return err(PE.unrecognizedToken(spanBefore));
134
+ }
135
+ return result;
136
+ }
137
+
138
+ return result;
139
+ }
140
+
141
+ function parseItemToken(token: Token, lexer: Lexer): ParseResult<Cbor> {
142
+ switch (token.type) {
143
+ case "Bool":
144
+ return ok(cbor(token.value));
145
+
146
+ case "Null":
147
+ return ok(cbor(null));
148
+
149
+ case "ByteStringHex":
150
+ return ok(cbor(token.value));
151
+
152
+ case "ByteStringBase64":
153
+ return ok(cbor(token.value));
154
+
155
+ case "DateLiteral":
156
+ return ok(cbor(token.value));
157
+
158
+ case "Number":
159
+ return ok(cbor(token.value));
160
+
161
+ case "NaN":
162
+ return ok(cbor(Number.NaN));
163
+
164
+ case "Infinity":
165
+ return ok(cbor(Number.POSITIVE_INFINITY));
166
+
167
+ case "NegInfinity":
168
+ return ok(cbor(Number.NEGATIVE_INFINITY));
169
+
170
+ case "String":
171
+ return parseString(token.value, lexer.span());
172
+
173
+ case "UR":
174
+ return parseUr(token.value, lexer.span());
175
+
176
+ case "TagValue":
177
+ return parseNumberTag(token.value, lexer);
178
+
179
+ case "TagName":
180
+ return parseNameTag(token.value, lexer);
181
+
182
+ case "KnownValueNumber":
183
+ return ok(new KnownValue(token.value).taggedCbor());
184
+
185
+ case "KnownValueName": {
186
+ // Empty string means Unit (value 0)
187
+ if (token.value === "") {
188
+ return ok(new KnownValue(0).taggedCbor());
189
+ }
190
+
191
+ const knownValue = knownValueForName(token.value);
192
+ if (knownValue !== undefined) {
193
+ return ok(knownValue.taggedCbor());
194
+ }
195
+ const tokenSpan = lexer.span();
196
+ return err(
197
+ PE.unknownKnownValueName(token.value, span(tokenSpan.start + 1, tokenSpan.end - 1)),
198
+ );
199
+ }
200
+
201
+ case "Unit":
202
+ return ok(new KnownValue(0).taggedCbor());
203
+
204
+ case "BracketOpen":
205
+ return parseArray(lexer);
206
+
207
+ case "BraceOpen":
208
+ return parseMap(lexer);
209
+
210
+ // Syntactic tokens that cannot start an item
211
+ case "BraceClose":
212
+ case "BracketClose":
213
+ case "ParenthesisOpen":
214
+ case "ParenthesisClose":
215
+ case "Colon":
216
+ case "Comma":
217
+ return err(PE.unexpectedToken(token, lexer.span()));
218
+ }
219
+ }
220
+
221
+ function parseString(s: string, tokenSpan: Span): ParseResult<Cbor> {
222
+ if (s.startsWith('"') && s.endsWith('"')) {
223
+ // Remove quotes and return the inner string
224
+ const inner = s.slice(1, -1);
225
+ return ok(cbor(inner));
226
+ }
227
+ return err(PE.unrecognizedToken(tokenSpan));
228
+ }
229
+
230
+ function tagForName(name: string): number | bigint | undefined {
231
+ return getGlobalTagsStore().tagForName(name)?.value;
232
+ }
233
+
234
+ function knownValueForName(name: string): KnownValue | undefined {
235
+ return KNOWN_VALUES.get().knownValueNamed(name);
236
+ }
237
+
238
+ function parseUr(ur: UR, tokenSpan: Span): ParseResult<Cbor> {
239
+ const urType = ur.urTypeStr();
240
+ const tag = tagForName(urType);
241
+
242
+ if (tag !== undefined) {
243
+ return ok(cbor({ tag, value: ur.cbor() }));
244
+ }
245
+
246
+ return err(
247
+ PE.unknownUrType(urType, span(tokenSpan.start + 3, tokenSpan.start + 3 + urType.length)),
248
+ );
249
+ }
250
+
251
+ function parseNumberTag(tagValue: number, lexer: Lexer): ParseResult<Cbor> {
252
+ const itemResult = parseItem(lexer);
253
+ if (!itemResult.ok) {
254
+ return itemResult;
255
+ }
256
+
257
+ const closeResult = expectToken(lexer);
258
+ if (!closeResult.ok) {
259
+ if (closeResult.error.type === "UnexpectedEndOfInput") {
260
+ return err(PE.unmatchedParentheses(lexer.span()));
261
+ }
262
+ return closeResult;
263
+ }
264
+
265
+ if (closeResult.value.type === "ParenthesisClose") {
266
+ return ok(cbor({ tag: tagValue, value: itemResult.value }));
267
+ }
268
+
269
+ return err(PE.unmatchedParentheses(lexer.span()));
270
+ }
271
+
272
+ function parseNameTag(name: string, lexer: Lexer): ParseResult<Cbor> {
273
+ const tagSpan = span(lexer.span().start, lexer.span().end - 1);
274
+
275
+ const itemResult = parseItem(lexer);
276
+ if (!itemResult.ok) {
277
+ return itemResult;
278
+ }
279
+
280
+ const closeResult = expectToken(lexer);
281
+ if (!closeResult.ok) {
282
+ return closeResult;
283
+ }
284
+
285
+ if (closeResult.value.type === "ParenthesisClose") {
286
+ const tag = tagForName(name);
287
+ if (tag !== undefined) {
288
+ return ok(cbor({ tag, value: itemResult.value }));
289
+ }
290
+ return err(PE.unknownTagName(name, tagSpan));
291
+ }
292
+
293
+ return err(PE.unmatchedParentheses(lexer.span()));
294
+ }
295
+
296
+ function parseArray(lexer: Lexer): ParseResult<Cbor> {
297
+ const items: Cbor[] = [];
298
+ let awaitsComma = false;
299
+ let awaitsItem = false;
300
+
301
+ while (true) {
302
+ const tokenResult = expectToken(lexer);
303
+ if (!tokenResult.ok) {
304
+ return tokenResult;
305
+ }
306
+
307
+ const token = tokenResult.value;
308
+
309
+ // Handle closing bracket
310
+ if (token.type === "BracketClose" && !awaitsItem) {
311
+ return ok(cbor(items));
312
+ }
313
+
314
+ // Handle comma
315
+ if (token.type === "Comma" && awaitsComma) {
316
+ awaitsItem = true;
317
+ awaitsComma = false;
318
+ continue;
319
+ }
320
+
321
+ // Expect an item when not awaiting comma
322
+ if (awaitsComma) {
323
+ return err(PE.expectedComma(lexer.span()));
324
+ }
325
+
326
+ // Parse the item
327
+ const itemResult = parseItemToken(token, lexer);
328
+ if (!itemResult.ok) {
329
+ return itemResult;
330
+ }
331
+
332
+ items.push(itemResult.value);
333
+ awaitsItem = false;
334
+ awaitsComma = true;
335
+ }
336
+ }
337
+
338
+ function parseMap(lexer: Lexer): ParseResult<Cbor> {
339
+ const map = new CborMap();
340
+ let awaitsComma = false;
341
+ let awaitsKey = false;
342
+
343
+ while (true) {
344
+ const tokenResult = expectToken(lexer);
345
+ if (!tokenResult.ok) {
346
+ if (tokenResult.error.type === "UnexpectedEndOfInput") {
347
+ return err(PE.unmatchedBraces(lexer.span()));
348
+ }
349
+ return tokenResult;
350
+ }
351
+
352
+ const token = tokenResult.value;
353
+
354
+ // Handle closing brace
355
+ if (token.type === "BraceClose" && !awaitsKey) {
356
+ return ok(cbor(map));
357
+ }
358
+
359
+ // Handle comma
360
+ if (token.type === "Comma" && awaitsComma) {
361
+ awaitsKey = true;
362
+ awaitsComma = false;
363
+ continue;
364
+ }
365
+
366
+ // Expect a key when not awaiting comma
367
+ if (awaitsComma) {
368
+ return err(PE.expectedComma(lexer.span()));
369
+ }
370
+
371
+ // Parse the key
372
+ const keyResult = parseItemToken(token, lexer);
373
+ if (!keyResult.ok) {
374
+ return keyResult;
375
+ }
376
+
377
+ const key = keyResult.value;
378
+ const keySpan = lexer.span();
379
+
380
+ // Check for duplicate key
381
+ if (map.has(key)) {
382
+ return err(PE.duplicateMapKey(keySpan));
383
+ }
384
+
385
+ // Expect colon
386
+ const colonResult = expectToken(lexer);
387
+ if (!colonResult.ok) {
388
+ return colonResult;
389
+ }
390
+
391
+ if (colonResult.value.type !== "Colon") {
392
+ return err(PE.expectedColon(lexer.span()));
393
+ }
394
+
395
+ // Parse the value
396
+ const valueResult = parseItem(lexer);
397
+ if (!valueResult.ok) {
398
+ if (valueResult.error.type === "UnexpectedToken") {
399
+ const unexpectedToken = (valueResult.error as { token: Token }).token;
400
+ if (unexpectedToken.type === "BraceClose") {
401
+ return err(PE.expectedMapKey(lexer.span()));
402
+ }
403
+ }
404
+ return valueResult;
405
+ }
406
+
407
+ map.set(key, valueResult.value);
408
+ awaitsKey = false;
409
+ awaitsComma = true;
410
+ }
411
+ }