@bcts/dcbor-parse 1.0.0-alpha.23 → 1.0.0-beta.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +151 -37
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +50 -6
- package/dist/index.d.cts.map +1 -1
- package/dist/index.d.mts +50 -6
- package/dist/index.d.mts.map +1 -1
- package/dist/index.iife.js +151 -37
- package/dist/index.iife.js.map +1 -1
- package/dist/index.mjs +151 -37
- package/dist/index.mjs.map +1 -1
- package/package.json +5 -5
- package/src/compose.ts +10 -2
- package/src/error.ts +139 -5
- package/src/index.ts +61 -16
- package/src/parse.ts +29 -11
- package/src/token.ts +127 -54
package/dist/index.cjs
CHANGED
|
@@ -224,7 +224,8 @@ function formatMessage(message, source, range) {
|
|
|
224
224
|
lineNumber++;
|
|
225
225
|
lineStart = idx + 1;
|
|
226
226
|
}
|
|
227
|
-
|
|
227
|
+
let line = source.split("\n")[lineNumber - 1] ?? "";
|
|
228
|
+
if (line.endsWith("\r")) line = line.slice(0, -1);
|
|
228
229
|
const column = Math.max(0, start - lineStart);
|
|
229
230
|
const underlineLen = Math.max(1, end - start);
|
|
230
231
|
const caret = " ".repeat(column) + "^".repeat(underlineLen);
|
|
@@ -312,8 +313,83 @@ function unwrapErr(result) {
|
|
|
312
313
|
if (!result.ok) return result.error;
|
|
313
314
|
throw new Error("Called unwrapErr on an Ok result");
|
|
314
315
|
}
|
|
316
|
+
/**
|
|
317
|
+
* Renders a {@link Token} the way Rust's
|
|
318
|
+
* `#[derive(Debug)]` on the corresponding enum variant would:
|
|
319
|
+
*
|
|
320
|
+
* - Variant-only tokens (`BraceOpen`, `Comma`, `Null`, `Unit`, `NaN`,
|
|
321
|
+
* …) print as the bare variant name.
|
|
322
|
+
* - Variant-with-value tokens print as `Variant(value)` where `value`
|
|
323
|
+
* uses Rust's `Debug` form for the payload type:
|
|
324
|
+
* `Bool(true)`, `Number(3.14)`, `String("foo")` (with the inner
|
|
325
|
+
* double quotes preserved — TS keeps them on the slice anyway),
|
|
326
|
+
* `TagValue(1234)`, `KnownValueNumber(42)`, `TagName("date")`,
|
|
327
|
+
* `KnownValueName("isA")`, `DateLiteral(2023-02-08T15:30:45.000Z)`,
|
|
328
|
+
* etc.
|
|
329
|
+
*
|
|
330
|
+
* Mirrors Rust's `Error::UnexpectedToken(Box<Token>, Span)` formatter
|
|
331
|
+
* `#[error("Unexpected token {0:?}")]` so error messages stay
|
|
332
|
+
* byte-identical to Rust.
|
|
333
|
+
*/
|
|
315
334
|
function tokenDebugString(token) {
|
|
316
|
-
|
|
335
|
+
switch (token.type) {
|
|
336
|
+
case "Bool": return `Bool(${token.value ? "true" : "false"})`;
|
|
337
|
+
case "BraceOpen": return "BraceOpen";
|
|
338
|
+
case "BraceClose": return "BraceClose";
|
|
339
|
+
case "BracketOpen": return "BracketOpen";
|
|
340
|
+
case "BracketClose": return "BracketClose";
|
|
341
|
+
case "ParenthesisOpen": return "ParenthesisOpen";
|
|
342
|
+
case "ParenthesisClose": return "ParenthesisClose";
|
|
343
|
+
case "Colon": return "Colon";
|
|
344
|
+
case "Comma": return "Comma";
|
|
345
|
+
case "Null": return "Null";
|
|
346
|
+
case "NaN": return "NaN";
|
|
347
|
+
case "Infinity": return "Infinity";
|
|
348
|
+
case "NegInfinity": return "NegInfinity";
|
|
349
|
+
case "Unit": return "Unit";
|
|
350
|
+
case "ByteStringHex": return `ByteStringHex(Ok(${formatBytesDebug(token.value)}))`;
|
|
351
|
+
case "ByteStringBase64": return `ByteStringBase64(Ok(${formatBytesDebug(token.value)}))`;
|
|
352
|
+
case "DateLiteral": return `DateLiteral(Ok(${String(token.value)}))`;
|
|
353
|
+
case "Number": return `Number(${formatNumberDebug(token.value)})`;
|
|
354
|
+
case "String": return `String(${JSON.stringify(token.value)})`;
|
|
355
|
+
case "TagValue": return `TagValue(Ok(${tagOrKnownValueDebug(token.value)}))`;
|
|
356
|
+
case "TagName": return `TagName(${JSON.stringify(token.value)})`;
|
|
357
|
+
case "KnownValueNumber": return `KnownValueNumber(Ok(${tagOrKnownValueDebug(token.value)}))`;
|
|
358
|
+
case "KnownValueName": return `KnownValueName(${JSON.stringify(token.value)})`;
|
|
359
|
+
case "UR": return `UR(Ok(${token.value.string()}))`;
|
|
360
|
+
}
|
|
361
|
+
}
|
|
362
|
+
/**
|
|
363
|
+
* Renders a `Vec<u8>` the way Rust's `Debug` does:
|
|
364
|
+
* `[0x68, 0x65, 0x6c, 0x6c, 0x6f]`.
|
|
365
|
+
*/
|
|
366
|
+
function formatBytesDebug(bytes) {
|
|
367
|
+
const parts = [];
|
|
368
|
+
for (const b of bytes) parts.push(`0x${b.toString(16).padStart(2, "0")}`);
|
|
369
|
+
return `[${parts.join(", ")}]`;
|
|
370
|
+
}
|
|
371
|
+
/**
|
|
372
|
+
* Renders a JS `number` the way Rust's `f64::Debug` typically prints
|
|
373
|
+
* it — using a decimal point even for integral values (e.g. `42.0`),
|
|
374
|
+
* and `inf` / `-inf` / `NaN` for non-finite numbers. The dCBOR-parse
|
|
375
|
+
* Rust source rarely produces a `Number` token in error messages
|
|
376
|
+
* (numbers normally land in tagged-content contexts), but we still
|
|
377
|
+
* mirror the convention so any error text is consistent with Rust.
|
|
378
|
+
*/
|
|
379
|
+
function formatNumberDebug(n) {
|
|
380
|
+
if (Number.isNaN(n)) return "NaN";
|
|
381
|
+
if (!Number.isFinite(n)) return n > 0 ? "inf" : "-inf";
|
|
382
|
+
if (Number.isInteger(n)) return `${n}.0`;
|
|
383
|
+
return String(n);
|
|
384
|
+
}
|
|
385
|
+
/**
|
|
386
|
+
* Renders a `u64` payload the way Rust's `Debug` does — a bare digit
|
|
387
|
+
* sequence without trailing `n` for `bigint` values. Mirrors
|
|
388
|
+
* `<u64 as Debug>::fmt` and `<TagValue as Debug>::fmt` (TagValue is a
|
|
389
|
+
* type alias for u64 in `bc-ur` / `dcbor`).
|
|
390
|
+
*/
|
|
391
|
+
function tagOrKnownValueDebug(value) {
|
|
392
|
+
return typeof value === "bigint" ? value.toString() : String(value);
|
|
317
393
|
}
|
|
318
394
|
|
|
319
395
|
//#endregion
|
|
@@ -487,11 +563,14 @@ var Lexer = class {
|
|
|
487
563
|
this._position++;
|
|
488
564
|
continue;
|
|
489
565
|
}
|
|
490
|
-
if (ch === "/"
|
|
491
|
-
this._position
|
|
492
|
-
while (
|
|
493
|
-
if (
|
|
494
|
-
|
|
566
|
+
if (ch === "/") {
|
|
567
|
+
let scan = this._position + 1;
|
|
568
|
+
while (scan < this._source.length && this._source[scan] !== "/") scan++;
|
|
569
|
+
if (scan < this._source.length) {
|
|
570
|
+
this._position = scan + 1;
|
|
571
|
+
continue;
|
|
572
|
+
}
|
|
573
|
+
break;
|
|
495
574
|
}
|
|
496
575
|
if (ch === "#") {
|
|
497
576
|
while (this._position < this._source.length && this._source[this._position] !== "\n") this._position++;
|
|
@@ -500,23 +579,33 @@ var Lexer = class {
|
|
|
500
579
|
break;
|
|
501
580
|
}
|
|
502
581
|
}
|
|
582
|
+
/**
|
|
583
|
+
* Matches reserved keywords: `true`, `false`, `null`, `NaN`,
|
|
584
|
+
* `Infinity`, `-Infinity`, `Unit`.
|
|
585
|
+
*
|
|
586
|
+
* Mirrors Rust's `Logos` `#[token(...)]` matcher
|
|
587
|
+
* (`bc-dcbor-parse-rust/src/token.rs:12-50, 164`), which is greedy
|
|
588
|
+
* and emits the keyword token *as soon as the literal matches* —
|
|
589
|
+
* subsequent characters become a separate (likely unrecognized) token
|
|
590
|
+
* stream. So input like `truex` lexes as `Bool(true)` followed by an
|
|
591
|
+
* unrecognized run on `x`. Earlier revisions of this port enforced an
|
|
592
|
+
* identifier boundary check (`!_isIdentifierChar(nextChar)`) and
|
|
593
|
+
* rejected the whole prefix as a single `UnrecognizedToken`, which
|
|
594
|
+
* broke span/variant parity with Rust.
|
|
595
|
+
*/
|
|
503
596
|
_tryMatchKeyword() {
|
|
504
597
|
const keywords = [
|
|
598
|
+
["-Infinity", token.negInfinity()],
|
|
505
599
|
["true", token.bool(true)],
|
|
506
600
|
["false", token.bool(false)],
|
|
507
601
|
["null", token.null()],
|
|
508
602
|
["NaN", token.nan()],
|
|
509
603
|
["Infinity", token.infinity()],
|
|
510
|
-
["-Infinity", token.negInfinity()],
|
|
511
604
|
["Unit", token.unit()]
|
|
512
605
|
];
|
|
513
606
|
for (const [keyword, tok] of keywords) if (this._matchLiteral(keyword)) {
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
this._tokenEnd = this._position;
|
|
517
|
-
return ok(tok);
|
|
518
|
-
}
|
|
519
|
-
this._position = this._tokenStart;
|
|
607
|
+
this._tokenEnd = this._position;
|
|
608
|
+
return ok(tok);
|
|
520
609
|
}
|
|
521
610
|
}
|
|
522
611
|
_tryMatchDateLiteral() {
|
|
@@ -545,9 +634,9 @@ var Lexer = class {
|
|
|
545
634
|
if (this._source[this._position + numStr.length] === "(" && !numStr.includes(".") && !numStr.includes("e") && !numStr.includes("E") && !numStr.startsWith("-")) {
|
|
546
635
|
this._position += numStr.length + 1;
|
|
547
636
|
this._tokenEnd = this._position;
|
|
548
|
-
const
|
|
549
|
-
if (
|
|
550
|
-
return ok(token.tagValue(
|
|
637
|
+
const parsed = parseUsize64(numStr);
|
|
638
|
+
if (parsed === void 0) return err(parseError.invalidTagValue(numStr, span(this._tokenStart, this._tokenStart + numStr.length)));
|
|
639
|
+
return ok(token.tagValue(parsed));
|
|
551
640
|
}
|
|
552
641
|
this._position += numStr.length;
|
|
553
642
|
this._tokenEnd = this._position;
|
|
@@ -579,15 +668,6 @@ var Lexer = class {
|
|
|
579
668
|
return ok(token.string(fullMatch));
|
|
580
669
|
}
|
|
581
670
|
this._position++;
|
|
582
|
-
while (this._position < this._source.length) {
|
|
583
|
-
const ch = this._source[this._position];
|
|
584
|
-
if (ch === "\"" || ch === "\n") {
|
|
585
|
-
if (ch === "\"") this._position++;
|
|
586
|
-
break;
|
|
587
|
-
}
|
|
588
|
-
if (ch === "\\") this._position += 2;
|
|
589
|
-
else this._position++;
|
|
590
|
-
}
|
|
591
671
|
this._tokenEnd = this._position;
|
|
592
672
|
return err(parseError.unrecognizedToken(this.span()));
|
|
593
673
|
}
|
|
@@ -644,8 +724,8 @@ var Lexer = class {
|
|
|
644
724
|
const numStr = match[1];
|
|
645
725
|
this._position += fullMatch.length;
|
|
646
726
|
this._tokenEnd = this._position;
|
|
647
|
-
const value =
|
|
648
|
-
if (
|
|
727
|
+
const value = parseUsize64(numStr);
|
|
728
|
+
if (value === void 0) return err(parseError.invalidKnownValue(numStr, span(this._tokenStart + 1, this._tokenEnd - 1)));
|
|
649
729
|
return ok(token.knownValueNumber(value));
|
|
650
730
|
}
|
|
651
731
|
match = /^'([a-zA-Z_][a-zA-Z0-9_-]*)'/.exec(remaining);
|
|
@@ -657,8 +737,6 @@ var Lexer = class {
|
|
|
657
737
|
return ok(token.knownValueName(name));
|
|
658
738
|
}
|
|
659
739
|
this._position++;
|
|
660
|
-
while (this._position < this._source.length && this._source[this._position] !== "'") this._position++;
|
|
661
|
-
if (this._position < this._source.length) this._position++;
|
|
662
740
|
this._tokenEnd = this._position;
|
|
663
741
|
return err(parseError.unrecognizedToken(this.span()));
|
|
664
742
|
}
|
|
@@ -704,11 +782,39 @@ var Lexer = class {
|
|
|
704
782
|
}
|
|
705
783
|
return false;
|
|
706
784
|
}
|
|
707
|
-
_isIdentifierChar(ch) {
|
|
708
|
-
return /[a-zA-Z0-9_-]/.test(ch);
|
|
709
|
-
}
|
|
710
785
|
};
|
|
711
786
|
/**
|
|
787
|
+
* Strictly parses a non-negative integer string in the range
|
|
788
|
+
* `[0, 2^64 - 1]`, mirroring Rust `<u64 as FromStr>::from_str`.
|
|
789
|
+
*
|
|
790
|
+
* - Empty input or non-digit characters → `undefined`.
|
|
791
|
+
* - Values that fit in `Number.MAX_SAFE_INTEGER` are returned as plain
|
|
792
|
+
* `number`s, so callers in the common case (tag values like `40000`,
|
|
793
|
+
* known values like `1`) never see a `bigint`.
|
|
794
|
+
* - Values in `(2^53-1, 2^64-1]` are returned as `bigint`. dCBOR's
|
|
795
|
+
* `cbor({ tag, value })` and `KnownValue` constructors both accept
|
|
796
|
+
* `bigint` natively, so the bigint flows through to wire encoding
|
|
797
|
+
* without precision loss.
|
|
798
|
+
* - Values strictly greater than `2^64 - 1` (or negative) are rejected
|
|
799
|
+
* so this parser never produces a tag/known-value outside the
|
|
800
|
+
* `u64` domain — matches Rust which fails `parse::<u64>()` in that
|
|
801
|
+
* case.
|
|
802
|
+
*/
|
|
803
|
+
const MAX_U64 = (1n << 64n) - 1n;
|
|
804
|
+
function parseUsize64(s) {
|
|
805
|
+
if (s.length === 0) return void 0;
|
|
806
|
+
if (!/^\d+$/.test(s)) return void 0;
|
|
807
|
+
let value;
|
|
808
|
+
try {
|
|
809
|
+
value = BigInt(s);
|
|
810
|
+
} catch {
|
|
811
|
+
return;
|
|
812
|
+
}
|
|
813
|
+
if (value < 0n || value > MAX_U64) return void 0;
|
|
814
|
+
if (value <= BigInt(Number.MAX_SAFE_INTEGER)) return Number(value);
|
|
815
|
+
return value;
|
|
816
|
+
}
|
|
817
|
+
/**
|
|
712
818
|
* Converts a hex string to bytes.
|
|
713
819
|
*/
|
|
714
820
|
function hexToBytes(hex) {
|
|
@@ -994,12 +1100,12 @@ function parseMap(lexer) {
|
|
|
994
1100
|
const keySpan = lexer.span();
|
|
995
1101
|
if (map.has(key)) return err(parseError.duplicateMapKey(keySpan));
|
|
996
1102
|
const colonResult = expectToken(lexer);
|
|
997
|
-
if (!colonResult.ok) return
|
|
998
|
-
if (colonResult.value.type !== "Colon") return err(parseError.expectedColon(lexer.span()));
|
|
1103
|
+
if (!colonResult.ok || colonResult.value.type !== "Colon") return err(parseError.expectedColon(lexer.span()));
|
|
999
1104
|
const valueResult = parseItem(lexer);
|
|
1000
1105
|
if (!valueResult.ok) {
|
|
1001
1106
|
if (valueResult.error.type === "UnexpectedToken") {
|
|
1002
|
-
|
|
1107
|
+
const unexpected = valueResult.error;
|
|
1108
|
+
if (unexpected.token.type === "BraceClose") return err(parseError.expectedMapKey(unexpected.span));
|
|
1003
1109
|
}
|
|
1004
1110
|
return valueResult;
|
|
1005
1111
|
}
|
|
@@ -1038,12 +1144,20 @@ const composeError = {
|
|
|
1038
1144
|
};
|
|
1039
1145
|
/**
|
|
1040
1146
|
* Gets the error message for a compose error.
|
|
1147
|
+
*
|
|
1148
|
+
* Mirrors Rust `Error::Display` (`bc-dcbor-parse-rust/src/compose.rs`):
|
|
1149
|
+
* the `ParseError` arm uses `#[error("Invalid CBOR item: {0}")]`, which
|
|
1150
|
+
* formats the inner error via its `Display` impl — *not* the variant
|
|
1151
|
+
* name. So `Error::ParseError(Error::EmptyInput)` formats as
|
|
1152
|
+
* `"Invalid CBOR item: Empty input"`, not
|
|
1153
|
+
* `"Invalid CBOR item: EmptyInput"`. We delegate to {@link errorMessage}
|
|
1154
|
+
* to get the same `Display`-style text.
|
|
1041
1155
|
*/
|
|
1042
1156
|
function composeErrorMessage(error) {
|
|
1043
1157
|
switch (error.type) {
|
|
1044
1158
|
case "OddMapLength": return "Invalid odd map length";
|
|
1045
1159
|
case "DuplicateMapKey": return "Duplicate map key";
|
|
1046
|
-
case "ParseError": return `Invalid CBOR item: ${error.error
|
|
1160
|
+
case "ParseError": return `Invalid CBOR item: ${errorMessage(error.error)}`;
|
|
1047
1161
|
}
|
|
1048
1162
|
}
|
|
1049
1163
|
/**
|