@bcts/dcbor-parse 1.0.0-alpha.22 → 1.0.0-beta.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -224,7 +224,8 @@ function formatMessage(message, source, range) {
224
224
  lineNumber++;
225
225
  lineStart = idx + 1;
226
226
  }
227
- const line = source.split("\n")[lineNumber - 1] ?? "";
227
+ let line = source.split("\n")[lineNumber - 1] ?? "";
228
+ if (line.endsWith("\r")) line = line.slice(0, -1);
228
229
  const column = Math.max(0, start - lineStart);
229
230
  const underlineLen = Math.max(1, end - start);
230
231
  const caret = " ".repeat(column) + "^".repeat(underlineLen);
@@ -312,8 +313,83 @@ function unwrapErr(result) {
312
313
  if (!result.ok) return result.error;
313
314
  throw new Error("Called unwrapErr on an Ok result");
314
315
  }
316
+ /**
317
+ * Renders a {@link Token} the way Rust's
318
+ * `#[derive(Debug)]` on the corresponding enum variant would:
319
+ *
320
+ * - Variant-only tokens (`BraceOpen`, `Comma`, `Null`, `Unit`, `NaN`,
321
+ * …) print as the bare variant name.
322
+ * - Variant-with-value tokens print as `Variant(value)` where `value`
323
+ * uses Rust's `Debug` form for the payload type:
324
+ * `Bool(true)`, `Number(3.14)`, `String("foo")` (with the inner
325
+ * double quotes preserved — TS keeps them on the slice anyway),
326
+ * `TagValue(1234)`, `KnownValueNumber(42)`, `TagName("date")`,
327
+ * `KnownValueName("isA")`, `DateLiteral(2023-02-08T15:30:45.000Z)`,
328
+ * etc.
329
+ *
330
+ * Mirrors Rust's `Error::UnexpectedToken(Box<Token>, Span)` formatter
331
+ * `#[error("Unexpected token {0:?}")]` so error messages stay
332
+ * byte-identical to Rust.
333
+ */
315
334
  function tokenDebugString(token) {
316
- return JSON.stringify(token);
335
+ switch (token.type) {
336
+ case "Bool": return `Bool(${token.value ? "true" : "false"})`;
337
+ case "BraceOpen": return "BraceOpen";
338
+ case "BraceClose": return "BraceClose";
339
+ case "BracketOpen": return "BracketOpen";
340
+ case "BracketClose": return "BracketClose";
341
+ case "ParenthesisOpen": return "ParenthesisOpen";
342
+ case "ParenthesisClose": return "ParenthesisClose";
343
+ case "Colon": return "Colon";
344
+ case "Comma": return "Comma";
345
+ case "Null": return "Null";
346
+ case "NaN": return "NaN";
347
+ case "Infinity": return "Infinity";
348
+ case "NegInfinity": return "NegInfinity";
349
+ case "Unit": return "Unit";
350
+ case "ByteStringHex": return `ByteStringHex(Ok(${formatBytesDebug(token.value)}))`;
351
+ case "ByteStringBase64": return `ByteStringBase64(Ok(${formatBytesDebug(token.value)}))`;
352
+ case "DateLiteral": return `DateLiteral(Ok(${String(token.value)}))`;
353
+ case "Number": return `Number(${formatNumberDebug(token.value)})`;
354
+ case "String": return `String(${JSON.stringify(token.value)})`;
355
+ case "TagValue": return `TagValue(Ok(${tagOrKnownValueDebug(token.value)}))`;
356
+ case "TagName": return `TagName(${JSON.stringify(token.value)})`;
357
+ case "KnownValueNumber": return `KnownValueNumber(Ok(${tagOrKnownValueDebug(token.value)}))`;
358
+ case "KnownValueName": return `KnownValueName(${JSON.stringify(token.value)})`;
359
+ case "UR": return `UR(Ok(${token.value.string()}))`;
360
+ }
361
+ }
362
+ /**
363
+ * Renders a `Vec<u8>` the way Rust's `Debug` does:
364
+ * `[0x68, 0x65, 0x6c, 0x6c, 0x6f]`.
365
+ */
366
+ function formatBytesDebug(bytes) {
367
+ const parts = [];
368
+ for (const b of bytes) parts.push(`0x${b.toString(16).padStart(2, "0")}`);
369
+ return `[${parts.join(", ")}]`;
370
+ }
371
+ /**
372
+ * Renders a JS `number` the way Rust's `f64::Debug` typically prints
373
+ * it — using a decimal point even for integral values (e.g. `42.0`),
374
+ * and `inf` / `-inf` / `NaN` for non-finite numbers. The dCBOR-parse
375
+ * Rust source rarely produces a `Number` token in error messages
376
+ * (numbers normally land in tagged-content contexts), but we still
377
+ * mirror the convention so any error text is consistent with Rust.
378
+ */
379
+ function formatNumberDebug(n) {
380
+ if (Number.isNaN(n)) return "NaN";
381
+ if (!Number.isFinite(n)) return n > 0 ? "inf" : "-inf";
382
+ if (Number.isInteger(n)) return `${n}.0`;
383
+ return String(n);
384
+ }
385
+ /**
386
+ * Renders a `u64` payload the way Rust's `Debug` does — a bare digit
387
+ * sequence without trailing `n` for `bigint` values. Mirrors
388
+ * `<u64 as Debug>::fmt` and `<TagValue as Debug>::fmt` (TagValue is a
389
+ * type alias for u64 in `bc-ur` / `dcbor`).
390
+ */
391
+ function tagOrKnownValueDebug(value) {
392
+ return typeof value === "bigint" ? value.toString() : String(value);
317
393
  }
318
394
 
319
395
  //#endregion
@@ -487,11 +563,14 @@ var Lexer = class {
487
563
  this._position++;
488
564
  continue;
489
565
  }
490
- if (ch === "/" && this._position + 1 < this._source.length && this._source[this._position + 1] !== "/") {
491
- this._position++;
492
- while (this._position < this._source.length && this._source[this._position] !== "/") this._position++;
493
- if (this._position < this._source.length) this._position++;
494
- continue;
566
+ if (ch === "/") {
567
+ let scan = this._position + 1;
568
+ while (scan < this._source.length && this._source[scan] !== "/") scan++;
569
+ if (scan < this._source.length) {
570
+ this._position = scan + 1;
571
+ continue;
572
+ }
573
+ break;
495
574
  }
496
575
  if (ch === "#") {
497
576
  while (this._position < this._source.length && this._source[this._position] !== "\n") this._position++;
@@ -500,23 +579,33 @@ var Lexer = class {
500
579
  break;
501
580
  }
502
581
  }
582
+ /**
583
+ * Matches reserved keywords: `true`, `false`, `null`, `NaN`,
584
+ * `Infinity`, `-Infinity`, `Unit`.
585
+ *
586
+ * Mirrors Rust's `Logos` `#[token(...)]` matcher
587
+ * (`bc-dcbor-parse-rust/src/token.rs:12-50, 164`), which is greedy
588
+ * and emits the keyword token *as soon as the literal matches* —
589
+ * subsequent characters become a separate (likely unrecognized) token
590
+ * stream. So input like `truex` lexes as `Bool(true)` followed by an
591
+ * unrecognized run on `x`. Earlier revisions of this port enforced an
592
+ * identifier boundary check (`!_isIdentifierChar(nextChar)`) and
593
+ * rejected the whole prefix as a single `UnrecognizedToken`, which
594
+ * broke span/variant parity with Rust.
595
+ */
503
596
  _tryMatchKeyword() {
504
597
  const keywords = [
598
+ ["-Infinity", token.negInfinity()],
505
599
  ["true", token.bool(true)],
506
600
  ["false", token.bool(false)],
507
601
  ["null", token.null()],
508
602
  ["NaN", token.nan()],
509
603
  ["Infinity", token.infinity()],
510
- ["-Infinity", token.negInfinity()],
511
604
  ["Unit", token.unit()]
512
605
  ];
513
606
  for (const [keyword, tok] of keywords) if (this._matchLiteral(keyword)) {
514
- const nextChar = this._source[this._position];
515
- if (nextChar === void 0 || !this._isIdentifierChar(nextChar)) {
516
- this._tokenEnd = this._position;
517
- return ok(tok);
518
- }
519
- this._position = this._tokenStart;
607
+ this._tokenEnd = this._position;
608
+ return ok(tok);
520
609
  }
521
610
  }
522
611
  _tryMatchDateLiteral() {
@@ -545,9 +634,9 @@ var Lexer = class {
545
634
  if (this._source[this._position + numStr.length] === "(" && !numStr.includes(".") && !numStr.includes("e") && !numStr.includes("E") && !numStr.startsWith("-")) {
546
635
  this._position += numStr.length + 1;
547
636
  this._tokenEnd = this._position;
548
- const tagValue = parseInt(numStr, 10);
549
- if (!Number.isSafeInteger(tagValue) || tagValue < 0) return err(parseError.invalidTagValue(numStr, span(this._tokenStart, this._tokenStart + numStr.length)));
550
- return ok(token.tagValue(tagValue));
637
+ const parsed = parseUsize64(numStr);
638
+ if (parsed === void 0) return err(parseError.invalidTagValue(numStr, span(this._tokenStart, this._tokenStart + numStr.length)));
639
+ return ok(token.tagValue(parsed));
551
640
  }
552
641
  this._position += numStr.length;
553
642
  this._tokenEnd = this._position;
@@ -579,15 +668,6 @@ var Lexer = class {
579
668
  return ok(token.string(fullMatch));
580
669
  }
581
670
  this._position++;
582
- while (this._position < this._source.length) {
583
- const ch = this._source[this._position];
584
- if (ch === "\"" || ch === "\n") {
585
- if (ch === "\"") this._position++;
586
- break;
587
- }
588
- if (ch === "\\") this._position += 2;
589
- else this._position++;
590
- }
591
671
  this._tokenEnd = this._position;
592
672
  return err(parseError.unrecognizedToken(this.span()));
593
673
  }
@@ -644,8 +724,8 @@ var Lexer = class {
644
724
  const numStr = match[1];
645
725
  this._position += fullMatch.length;
646
726
  this._tokenEnd = this._position;
647
- const value = parseInt(numStr, 10);
648
- if (!Number.isSafeInteger(value) || value < 0) return err(parseError.invalidKnownValue(numStr, span(this._tokenStart + 1, this._tokenEnd - 1)));
727
+ const value = parseUsize64(numStr);
728
+ if (value === void 0) return err(parseError.invalidKnownValue(numStr, span(this._tokenStart + 1, this._tokenEnd - 1)));
649
729
  return ok(token.knownValueNumber(value));
650
730
  }
651
731
  match = /^'([a-zA-Z_][a-zA-Z0-9_-]*)'/.exec(remaining);
@@ -657,8 +737,6 @@ var Lexer = class {
657
737
  return ok(token.knownValueName(name));
658
738
  }
659
739
  this._position++;
660
- while (this._position < this._source.length && this._source[this._position] !== "'") this._position++;
661
- if (this._position < this._source.length) this._position++;
662
740
  this._tokenEnd = this._position;
663
741
  return err(parseError.unrecognizedToken(this.span()));
664
742
  }
@@ -704,11 +782,39 @@ var Lexer = class {
704
782
  }
705
783
  return false;
706
784
  }
707
- _isIdentifierChar(ch) {
708
- return /[a-zA-Z0-9_-]/.test(ch);
709
- }
710
785
  };
711
786
  /**
787
+ * Strictly parses a non-negative integer string in the range
788
+ * `[0, 2^64 - 1]`, mirroring Rust `<u64 as FromStr>::from_str`.
789
+ *
790
+ * - Empty input or non-digit characters → `undefined`.
791
+ * - Values that fit in `Number.MAX_SAFE_INTEGER` are returned as plain
792
+ * `number`s, so callers in the common case (tag values like `40000`,
793
+ * known values like `1`) never see a `bigint`.
794
+ * - Values in `(2^53-1, 2^64-1]` are returned as `bigint`. dCBOR's
795
+ * `cbor({ tag, value })` and `KnownValue` constructors both accept
796
+ * `bigint` natively, so the bigint flows through to wire encoding
797
+ * without precision loss.
798
+ * - Values strictly greater than `2^64 - 1` (or negative) are rejected
799
+ * so this parser never produces a tag/known-value outside the
800
+ * `u64` domain — matches Rust which fails `parse::<u64>()` in that
801
+ * case.
802
+ */
803
+ const MAX_U64 = (1n << 64n) - 1n;
804
+ function parseUsize64(s) {
805
+ if (s.length === 0) return void 0;
806
+ if (!/^\d+$/.test(s)) return void 0;
807
+ let value;
808
+ try {
809
+ value = BigInt(s);
810
+ } catch {
811
+ return;
812
+ }
813
+ if (value < 0n || value > MAX_U64) return void 0;
814
+ if (value <= BigInt(Number.MAX_SAFE_INTEGER)) return Number(value);
815
+ return value;
816
+ }
817
+ /**
712
818
  * Converts a hex string to bytes.
713
819
  */
714
820
  function hexToBytes(hex) {
@@ -994,12 +1100,12 @@ function parseMap(lexer) {
994
1100
  const keySpan = lexer.span();
995
1101
  if (map.has(key)) return err(parseError.duplicateMapKey(keySpan));
996
1102
  const colonResult = expectToken(lexer);
997
- if (!colonResult.ok) return colonResult;
998
- if (colonResult.value.type !== "Colon") return err(parseError.expectedColon(lexer.span()));
1103
+ if (!colonResult.ok || colonResult.value.type !== "Colon") return err(parseError.expectedColon(lexer.span()));
999
1104
  const valueResult = parseItem(lexer);
1000
1105
  if (!valueResult.ok) {
1001
1106
  if (valueResult.error.type === "UnexpectedToken") {
1002
- if (valueResult.error.token.type === "BraceClose") return err(parseError.expectedMapKey(lexer.span()));
1107
+ const unexpected = valueResult.error;
1108
+ if (unexpected.token.type === "BraceClose") return err(parseError.expectedMapKey(unexpected.span));
1003
1109
  }
1004
1110
  return valueResult;
1005
1111
  }
@@ -1038,12 +1144,20 @@ const composeError = {
1038
1144
  };
1039
1145
  /**
1040
1146
  * Gets the error message for a compose error.
1147
+ *
1148
+ * Mirrors Rust `Error::Display` (`bc-dcbor-parse-rust/src/compose.rs`):
1149
+ * the `ParseError` arm uses `#[error("Invalid CBOR item: {0}")]`, which
1150
+ * formats the inner error via its `Display` impl — *not* the variant
1151
+ * name. So `Error::ParseError(Error::EmptyInput)` formats as
1152
+ * `"Invalid CBOR item: Empty input"`, not
1153
+ * `"Invalid CBOR item: EmptyInput"`. We delegate to {@link errorMessage}
1154
+ * to get the same `Display`-style text.
1041
1155
  */
1042
1156
  function composeErrorMessage(error) {
1043
1157
  switch (error.type) {
1044
1158
  case "OddMapLength": return "Invalid odd map length";
1045
1159
  case "DuplicateMapKey": return "Duplicate map key";
1046
- case "ParseError": return `Invalid CBOR item: ${error.error.type}`;
1160
+ case "ParseError": return `Invalid CBOR item: ${errorMessage(error.error)}`;
1047
1161
  }
1048
1162
  }
1049
1163
  /**