@bcts/dcbor-parse 1.0.0-alpha.23 → 1.0.0-beta.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +151 -37
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +50 -6
- package/dist/index.d.cts.map +1 -1
- package/dist/index.d.mts +50 -6
- package/dist/index.d.mts.map +1 -1
- package/dist/index.iife.js +151 -37
- package/dist/index.iife.js.map +1 -1
- package/dist/index.mjs +151 -37
- package/dist/index.mjs.map +1 -1
- package/package.json +5 -5
- package/src/compose.ts +10 -2
- package/src/error.ts +139 -5
- package/src/index.ts +61 -16
- package/src/parse.ts +29 -11
- package/src/token.ts +127 -54
package/dist/index.mjs
CHANGED
|
@@ -223,7 +223,8 @@ function formatMessage(message, source, range) {
|
|
|
223
223
|
lineNumber++;
|
|
224
224
|
lineStart = idx + 1;
|
|
225
225
|
}
|
|
226
|
-
|
|
226
|
+
let line = source.split("\n")[lineNumber - 1] ?? "";
|
|
227
|
+
if (line.endsWith("\r")) line = line.slice(0, -1);
|
|
227
228
|
const column = Math.max(0, start - lineStart);
|
|
228
229
|
const underlineLen = Math.max(1, end - start);
|
|
229
230
|
const caret = " ".repeat(column) + "^".repeat(underlineLen);
|
|
@@ -311,8 +312,83 @@ function unwrapErr(result) {
|
|
|
311
312
|
if (!result.ok) return result.error;
|
|
312
313
|
throw new Error("Called unwrapErr on an Ok result");
|
|
313
314
|
}
|
|
315
|
+
/**
|
|
316
|
+
* Renders a {@link Token} the way Rust's
|
|
317
|
+
* `#[derive(Debug)]` on the corresponding enum variant would:
|
|
318
|
+
*
|
|
319
|
+
* - Variant-only tokens (`BraceOpen`, `Comma`, `Null`, `Unit`, `NaN`,
|
|
320
|
+
* …) print as the bare variant name.
|
|
321
|
+
* - Variant-with-value tokens print as `Variant(value)` where `value`
|
|
322
|
+
* uses Rust's `Debug` form for the payload type:
|
|
323
|
+
* `Bool(true)`, `Number(3.14)`, `String("foo")` (with the inner
|
|
324
|
+
* double quotes preserved — TS keeps them on the slice anyway),
|
|
325
|
+
* `TagValue(1234)`, `KnownValueNumber(42)`, `TagName("date")`,
|
|
326
|
+
* `KnownValueName("isA")`, `DateLiteral(2023-02-08T15:30:45.000Z)`,
|
|
327
|
+
* etc.
|
|
328
|
+
*
|
|
329
|
+
* Mirrors Rust's `Error::UnexpectedToken(Box<Token>, Span)` formatter
|
|
330
|
+
* `#[error("Unexpected token {0:?}")]` so error messages stay
|
|
331
|
+
* byte-identical to Rust.
|
|
332
|
+
*/
|
|
314
333
|
function tokenDebugString(token) {
|
|
315
|
-
|
|
334
|
+
switch (token.type) {
|
|
335
|
+
case "Bool": return `Bool(${token.value ? "true" : "false"})`;
|
|
336
|
+
case "BraceOpen": return "BraceOpen";
|
|
337
|
+
case "BraceClose": return "BraceClose";
|
|
338
|
+
case "BracketOpen": return "BracketOpen";
|
|
339
|
+
case "BracketClose": return "BracketClose";
|
|
340
|
+
case "ParenthesisOpen": return "ParenthesisOpen";
|
|
341
|
+
case "ParenthesisClose": return "ParenthesisClose";
|
|
342
|
+
case "Colon": return "Colon";
|
|
343
|
+
case "Comma": return "Comma";
|
|
344
|
+
case "Null": return "Null";
|
|
345
|
+
case "NaN": return "NaN";
|
|
346
|
+
case "Infinity": return "Infinity";
|
|
347
|
+
case "NegInfinity": return "NegInfinity";
|
|
348
|
+
case "Unit": return "Unit";
|
|
349
|
+
case "ByteStringHex": return `ByteStringHex(Ok(${formatBytesDebug(token.value)}))`;
|
|
350
|
+
case "ByteStringBase64": return `ByteStringBase64(Ok(${formatBytesDebug(token.value)}))`;
|
|
351
|
+
case "DateLiteral": return `DateLiteral(Ok(${String(token.value)}))`;
|
|
352
|
+
case "Number": return `Number(${formatNumberDebug(token.value)})`;
|
|
353
|
+
case "String": return `String(${JSON.stringify(token.value)})`;
|
|
354
|
+
case "TagValue": return `TagValue(Ok(${tagOrKnownValueDebug(token.value)}))`;
|
|
355
|
+
case "TagName": return `TagName(${JSON.stringify(token.value)})`;
|
|
356
|
+
case "KnownValueNumber": return `KnownValueNumber(Ok(${tagOrKnownValueDebug(token.value)}))`;
|
|
357
|
+
case "KnownValueName": return `KnownValueName(${JSON.stringify(token.value)})`;
|
|
358
|
+
case "UR": return `UR(Ok(${token.value.string()}))`;
|
|
359
|
+
}
|
|
360
|
+
}
|
|
361
|
+
/**
|
|
362
|
+
* Renders a `Vec<u8>` the way Rust's `Debug` does:
|
|
363
|
+
* `[0x68, 0x65, 0x6c, 0x6c, 0x6f]`.
|
|
364
|
+
*/
|
|
365
|
+
function formatBytesDebug(bytes) {
|
|
366
|
+
const parts = [];
|
|
367
|
+
for (const b of bytes) parts.push(`0x${b.toString(16).padStart(2, "0")}`);
|
|
368
|
+
return `[${parts.join(", ")}]`;
|
|
369
|
+
}
|
|
370
|
+
/**
|
|
371
|
+
* Renders a JS `number` the way Rust's `f64::Debug` typically prints
|
|
372
|
+
* it — using a decimal point even for integral values (e.g. `42.0`),
|
|
373
|
+
* and `inf` / `-inf` / `NaN` for non-finite numbers. The dCBOR-parse
|
|
374
|
+
* Rust source rarely produces a `Number` token in error messages
|
|
375
|
+
* (numbers normally land in tagged-content contexts), but we still
|
|
376
|
+
* mirror the convention so any error text is consistent with Rust.
|
|
377
|
+
*/
|
|
378
|
+
function formatNumberDebug(n) {
|
|
379
|
+
if (Number.isNaN(n)) return "NaN";
|
|
380
|
+
if (!Number.isFinite(n)) return n > 0 ? "inf" : "-inf";
|
|
381
|
+
if (Number.isInteger(n)) return `${n}.0`;
|
|
382
|
+
return String(n);
|
|
383
|
+
}
|
|
384
|
+
/**
|
|
385
|
+
* Renders a `u64` payload the way Rust's `Debug` does — a bare digit
|
|
386
|
+
* sequence without trailing `n` for `bigint` values. Mirrors
|
|
387
|
+
* `<u64 as Debug>::fmt` and `<TagValue as Debug>::fmt` (TagValue is a
|
|
388
|
+
* type alias for u64 in `bc-ur` / `dcbor`).
|
|
389
|
+
*/
|
|
390
|
+
function tagOrKnownValueDebug(value) {
|
|
391
|
+
return typeof value === "bigint" ? value.toString() : String(value);
|
|
316
392
|
}
|
|
317
393
|
|
|
318
394
|
//#endregion
|
|
@@ -486,11 +562,14 @@ var Lexer = class {
|
|
|
486
562
|
this._position++;
|
|
487
563
|
continue;
|
|
488
564
|
}
|
|
489
|
-
if (ch === "/"
|
|
490
|
-
this._position
|
|
491
|
-
while (
|
|
492
|
-
if (
|
|
493
|
-
|
|
565
|
+
if (ch === "/") {
|
|
566
|
+
let scan = this._position + 1;
|
|
567
|
+
while (scan < this._source.length && this._source[scan] !== "/") scan++;
|
|
568
|
+
if (scan < this._source.length) {
|
|
569
|
+
this._position = scan + 1;
|
|
570
|
+
continue;
|
|
571
|
+
}
|
|
572
|
+
break;
|
|
494
573
|
}
|
|
495
574
|
if (ch === "#") {
|
|
496
575
|
while (this._position < this._source.length && this._source[this._position] !== "\n") this._position++;
|
|
@@ -499,23 +578,33 @@ var Lexer = class {
|
|
|
499
578
|
break;
|
|
500
579
|
}
|
|
501
580
|
}
|
|
581
|
+
/**
|
|
582
|
+
* Matches reserved keywords: `true`, `false`, `null`, `NaN`,
|
|
583
|
+
* `Infinity`, `-Infinity`, `Unit`.
|
|
584
|
+
*
|
|
585
|
+
* Mirrors Rust's `Logos` `#[token(...)]` matcher
|
|
586
|
+
* (`bc-dcbor-parse-rust/src/token.rs:12-50, 164`), which is greedy
|
|
587
|
+
* and emits the keyword token *as soon as the literal matches* —
|
|
588
|
+
* subsequent characters become a separate (likely unrecognized) token
|
|
589
|
+
* stream. So input like `truex` lexes as `Bool(true)` followed by an
|
|
590
|
+
* unrecognized run on `x`. Earlier revisions of this port enforced an
|
|
591
|
+
* identifier boundary check (`!_isIdentifierChar(nextChar)`) and
|
|
592
|
+
* rejected the whole prefix as a single `UnrecognizedToken`, which
|
|
593
|
+
* broke span/variant parity with Rust.
|
|
594
|
+
*/
|
|
502
595
|
_tryMatchKeyword() {
|
|
503
596
|
const keywords = [
|
|
597
|
+
["-Infinity", token.negInfinity()],
|
|
504
598
|
["true", token.bool(true)],
|
|
505
599
|
["false", token.bool(false)],
|
|
506
600
|
["null", token.null()],
|
|
507
601
|
["NaN", token.nan()],
|
|
508
602
|
["Infinity", token.infinity()],
|
|
509
|
-
["-Infinity", token.negInfinity()],
|
|
510
603
|
["Unit", token.unit()]
|
|
511
604
|
];
|
|
512
605
|
for (const [keyword, tok] of keywords) if (this._matchLiteral(keyword)) {
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
this._tokenEnd = this._position;
|
|
516
|
-
return ok(tok);
|
|
517
|
-
}
|
|
518
|
-
this._position = this._tokenStart;
|
|
606
|
+
this._tokenEnd = this._position;
|
|
607
|
+
return ok(tok);
|
|
519
608
|
}
|
|
520
609
|
}
|
|
521
610
|
_tryMatchDateLiteral() {
|
|
@@ -544,9 +633,9 @@ var Lexer = class {
|
|
|
544
633
|
if (this._source[this._position + numStr.length] === "(" && !numStr.includes(".") && !numStr.includes("e") && !numStr.includes("E") && !numStr.startsWith("-")) {
|
|
545
634
|
this._position += numStr.length + 1;
|
|
546
635
|
this._tokenEnd = this._position;
|
|
547
|
-
const
|
|
548
|
-
if (
|
|
549
|
-
return ok(token.tagValue(
|
|
636
|
+
const parsed = parseUsize64(numStr);
|
|
637
|
+
if (parsed === void 0) return err(parseError.invalidTagValue(numStr, span(this._tokenStart, this._tokenStart + numStr.length)));
|
|
638
|
+
return ok(token.tagValue(parsed));
|
|
550
639
|
}
|
|
551
640
|
this._position += numStr.length;
|
|
552
641
|
this._tokenEnd = this._position;
|
|
@@ -578,15 +667,6 @@ var Lexer = class {
|
|
|
578
667
|
return ok(token.string(fullMatch));
|
|
579
668
|
}
|
|
580
669
|
this._position++;
|
|
581
|
-
while (this._position < this._source.length) {
|
|
582
|
-
const ch = this._source[this._position];
|
|
583
|
-
if (ch === "\"" || ch === "\n") {
|
|
584
|
-
if (ch === "\"") this._position++;
|
|
585
|
-
break;
|
|
586
|
-
}
|
|
587
|
-
if (ch === "\\") this._position += 2;
|
|
588
|
-
else this._position++;
|
|
589
|
-
}
|
|
590
670
|
this._tokenEnd = this._position;
|
|
591
671
|
return err(parseError.unrecognizedToken(this.span()));
|
|
592
672
|
}
|
|
@@ -643,8 +723,8 @@ var Lexer = class {
|
|
|
643
723
|
const numStr = match[1];
|
|
644
724
|
this._position += fullMatch.length;
|
|
645
725
|
this._tokenEnd = this._position;
|
|
646
|
-
const value =
|
|
647
|
-
if (
|
|
726
|
+
const value = parseUsize64(numStr);
|
|
727
|
+
if (value === void 0) return err(parseError.invalidKnownValue(numStr, span(this._tokenStart + 1, this._tokenEnd - 1)));
|
|
648
728
|
return ok(token.knownValueNumber(value));
|
|
649
729
|
}
|
|
650
730
|
match = /^'([a-zA-Z_][a-zA-Z0-9_-]*)'/.exec(remaining);
|
|
@@ -656,8 +736,6 @@ var Lexer = class {
|
|
|
656
736
|
return ok(token.knownValueName(name));
|
|
657
737
|
}
|
|
658
738
|
this._position++;
|
|
659
|
-
while (this._position < this._source.length && this._source[this._position] !== "'") this._position++;
|
|
660
|
-
if (this._position < this._source.length) this._position++;
|
|
661
739
|
this._tokenEnd = this._position;
|
|
662
740
|
return err(parseError.unrecognizedToken(this.span()));
|
|
663
741
|
}
|
|
@@ -703,11 +781,39 @@ var Lexer = class {
|
|
|
703
781
|
}
|
|
704
782
|
return false;
|
|
705
783
|
}
|
|
706
|
-
_isIdentifierChar(ch) {
|
|
707
|
-
return /[a-zA-Z0-9_-]/.test(ch);
|
|
708
|
-
}
|
|
709
784
|
};
|
|
710
785
|
/**
|
|
786
|
+
* Strictly parses a non-negative integer string in the range
|
|
787
|
+
* `[0, 2^64 - 1]`, mirroring Rust `<u64 as FromStr>::from_str`.
|
|
788
|
+
*
|
|
789
|
+
* - Empty input or non-digit characters → `undefined`.
|
|
790
|
+
* - Values that fit in `Number.MAX_SAFE_INTEGER` are returned as plain
|
|
791
|
+
* `number`s, so callers in the common case (tag values like `40000`,
|
|
792
|
+
* known values like `1`) never see a `bigint`.
|
|
793
|
+
* - Values in `(2^53-1, 2^64-1]` are returned as `bigint`. dCBOR's
|
|
794
|
+
* `cbor({ tag, value })` and `KnownValue` constructors both accept
|
|
795
|
+
* `bigint` natively, so the bigint flows through to wire encoding
|
|
796
|
+
* without precision loss.
|
|
797
|
+
* - Values strictly greater than `2^64 - 1` (or negative) are rejected
|
|
798
|
+
* so this parser never produces a tag/known-value outside the
|
|
799
|
+
* `u64` domain — matches Rust which fails `parse::<u64>()` in that
|
|
800
|
+
* case.
|
|
801
|
+
*/
|
|
802
|
+
const MAX_U64 = (1n << 64n) - 1n;
|
|
803
|
+
function parseUsize64(s) {
|
|
804
|
+
if (s.length === 0) return void 0;
|
|
805
|
+
if (!/^\d+$/.test(s)) return void 0;
|
|
806
|
+
let value;
|
|
807
|
+
try {
|
|
808
|
+
value = BigInt(s);
|
|
809
|
+
} catch {
|
|
810
|
+
return;
|
|
811
|
+
}
|
|
812
|
+
if (value < 0n || value > MAX_U64) return void 0;
|
|
813
|
+
if (value <= BigInt(Number.MAX_SAFE_INTEGER)) return Number(value);
|
|
814
|
+
return value;
|
|
815
|
+
}
|
|
816
|
+
/**
|
|
711
817
|
* Converts a hex string to bytes.
|
|
712
818
|
*/
|
|
713
819
|
function hexToBytes(hex) {
|
|
@@ -993,12 +1099,12 @@ function parseMap(lexer) {
|
|
|
993
1099
|
const keySpan = lexer.span();
|
|
994
1100
|
if (map.has(key)) return err(parseError.duplicateMapKey(keySpan));
|
|
995
1101
|
const colonResult = expectToken(lexer);
|
|
996
|
-
if (!colonResult.ok) return
|
|
997
|
-
if (colonResult.value.type !== "Colon") return err(parseError.expectedColon(lexer.span()));
|
|
1102
|
+
if (!colonResult.ok || colonResult.value.type !== "Colon") return err(parseError.expectedColon(lexer.span()));
|
|
998
1103
|
const valueResult = parseItem(lexer);
|
|
999
1104
|
if (!valueResult.ok) {
|
|
1000
1105
|
if (valueResult.error.type === "UnexpectedToken") {
|
|
1001
|
-
|
|
1106
|
+
const unexpected = valueResult.error;
|
|
1107
|
+
if (unexpected.token.type === "BraceClose") return err(parseError.expectedMapKey(unexpected.span));
|
|
1002
1108
|
}
|
|
1003
1109
|
return valueResult;
|
|
1004
1110
|
}
|
|
@@ -1037,12 +1143,20 @@ const composeError = {
|
|
|
1037
1143
|
};
|
|
1038
1144
|
/**
|
|
1039
1145
|
* Gets the error message for a compose error.
|
|
1146
|
+
*
|
|
1147
|
+
* Mirrors Rust `Error::Display` (`bc-dcbor-parse-rust/src/compose.rs`):
|
|
1148
|
+
* the `ParseError` arm uses `#[error("Invalid CBOR item: {0}")]`, which
|
|
1149
|
+
* formats the inner error via its `Display` impl — *not* the variant
|
|
1150
|
+
* name. So `Error::ParseError(Error::EmptyInput)` formats as
|
|
1151
|
+
* `"Invalid CBOR item: Empty input"`, not
|
|
1152
|
+
* `"Invalid CBOR item: EmptyInput"`. We delegate to {@link errorMessage}
|
|
1153
|
+
* to get the same `Display`-style text.
|
|
1040
1154
|
*/
|
|
1041
1155
|
function composeErrorMessage(error) {
|
|
1042
1156
|
switch (error.type) {
|
|
1043
1157
|
case "OddMapLength": return "Invalid odd map length";
|
|
1044
1158
|
case "DuplicateMapKey": return "Duplicate map key";
|
|
1045
|
-
case "ParseError": return `Invalid CBOR item: ${error.error
|
|
1159
|
+
case "ParseError": return `Invalid CBOR item: ${errorMessage(error.error)}`;
|
|
1046
1160
|
}
|
|
1047
1161
|
}
|
|
1048
1162
|
/**
|