@bcts/dcbor-parse 1.0.0-alpha.23 → 1.0.0-beta.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -223,7 +223,8 @@ function formatMessage(message, source, range) {
223
223
  lineNumber++;
224
224
  lineStart = idx + 1;
225
225
  }
226
- const line = source.split("\n")[lineNumber - 1] ?? "";
226
+ let line = source.split("\n")[lineNumber - 1] ?? "";
227
+ if (line.endsWith("\r")) line = line.slice(0, -1);
227
228
  const column = Math.max(0, start - lineStart);
228
229
  const underlineLen = Math.max(1, end - start);
229
230
  const caret = " ".repeat(column) + "^".repeat(underlineLen);
@@ -311,8 +312,83 @@ function unwrapErr(result) {
311
312
  if (!result.ok) return result.error;
312
313
  throw new Error("Called unwrapErr on an Ok result");
313
314
  }
315
+ /**
316
+ * Renders a {@link Token} the way Rust's
317
+ * `#[derive(Debug)]` on the corresponding enum variant would:
318
+ *
319
+ * - Variant-only tokens (`BraceOpen`, `Comma`, `Null`, `Unit`, `NaN`,
320
+ * …) print as the bare variant name.
321
+ * - Variant-with-value tokens print as `Variant(value)` where `value`
322
+ * uses Rust's `Debug` form for the payload type:
323
+ * `Bool(true)`, `Number(3.14)`, `String("foo")` (with the inner
324
+ * double quotes preserved — TS keeps them on the slice anyway),
325
+ * `TagValue(1234)`, `KnownValueNumber(42)`, `TagName("date")`,
326
+ * `KnownValueName("isA")`, `DateLiteral(2023-02-08T15:30:45.000Z)`,
327
+ * etc.
328
+ *
329
+ * Mirrors Rust's `Error::UnexpectedToken(Box<Token>, Span)` formatter
330
+ * `#[error("Unexpected token {0:?}")]` so error messages stay
331
+ * byte-identical to Rust.
332
+ */
314
333
  function tokenDebugString(token) {
315
- return JSON.stringify(token);
334
+ switch (token.type) {
335
+ case "Bool": return `Bool(${token.value ? "true" : "false"})`;
336
+ case "BraceOpen": return "BraceOpen";
337
+ case "BraceClose": return "BraceClose";
338
+ case "BracketOpen": return "BracketOpen";
339
+ case "BracketClose": return "BracketClose";
340
+ case "ParenthesisOpen": return "ParenthesisOpen";
341
+ case "ParenthesisClose": return "ParenthesisClose";
342
+ case "Colon": return "Colon";
343
+ case "Comma": return "Comma";
344
+ case "Null": return "Null";
345
+ case "NaN": return "NaN";
346
+ case "Infinity": return "Infinity";
347
+ case "NegInfinity": return "NegInfinity";
348
+ case "Unit": return "Unit";
349
+ case "ByteStringHex": return `ByteStringHex(Ok(${formatBytesDebug(token.value)}))`;
350
+ case "ByteStringBase64": return `ByteStringBase64(Ok(${formatBytesDebug(token.value)}))`;
351
+ case "DateLiteral": return `DateLiteral(Ok(${String(token.value)}))`;
352
+ case "Number": return `Number(${formatNumberDebug(token.value)})`;
353
+ case "String": return `String(${JSON.stringify(token.value)})`;
354
+ case "TagValue": return `TagValue(Ok(${tagOrKnownValueDebug(token.value)}))`;
355
+ case "TagName": return `TagName(${JSON.stringify(token.value)})`;
356
+ case "KnownValueNumber": return `KnownValueNumber(Ok(${tagOrKnownValueDebug(token.value)}))`;
357
+ case "KnownValueName": return `KnownValueName(${JSON.stringify(token.value)})`;
358
+ case "UR": return `UR(Ok(${token.value.string()}))`;
359
+ }
360
+ }
361
+ /**
362
+ * Renders a `Vec<u8>` the way Rust's `Debug` does:
363
+ * `[0x68, 0x65, 0x6c, 0x6c, 0x6f]`.
364
+ */
365
+ function formatBytesDebug(bytes) {
366
+ const parts = [];
367
+ for (const b of bytes) parts.push(`0x${b.toString(16).padStart(2, "0")}`);
368
+ return `[${parts.join(", ")}]`;
369
+ }
370
+ /**
371
+ * Renders a JS `number` the way Rust's `f64::Debug` typically prints
372
+ * it — using a decimal point even for integral values (e.g. `42.0`),
373
+ * and `inf` / `-inf` / `NaN` for non-finite numbers. The dCBOR-parse
374
+ * Rust source rarely produces a `Number` token in error messages
375
+ * (numbers normally land in tagged-content contexts), but we still
376
+ * mirror the convention so any error text is consistent with Rust.
377
+ */
378
+ function formatNumberDebug(n) {
379
+ if (Number.isNaN(n)) return "NaN";
380
+ if (!Number.isFinite(n)) return n > 0 ? "inf" : "-inf";
381
+ if (Number.isInteger(n)) return `${n}.0`;
382
+ return String(n);
383
+ }
384
+ /**
385
+ * Renders a `u64` payload the way Rust's `Debug` does — a bare digit
386
+ * sequence without trailing `n` for `bigint` values. Mirrors
387
+ * `<u64 as Debug>::fmt` and `<TagValue as Debug>::fmt` (TagValue is a
388
+ * type alias for u64 in `bc-ur` / `dcbor`).
389
+ */
390
+ function tagOrKnownValueDebug(value) {
391
+ return typeof value === "bigint" ? value.toString() : String(value);
316
392
  }
317
393
 
318
394
  //#endregion
@@ -486,11 +562,14 @@ var Lexer = class {
486
562
  this._position++;
487
563
  continue;
488
564
  }
489
- if (ch === "/" && this._position + 1 < this._source.length && this._source[this._position + 1] !== "/") {
490
- this._position++;
491
- while (this._position < this._source.length && this._source[this._position] !== "/") this._position++;
492
- if (this._position < this._source.length) this._position++;
493
- continue;
565
+ if (ch === "/") {
566
+ let scan = this._position + 1;
567
+ while (scan < this._source.length && this._source[scan] !== "/") scan++;
568
+ if (scan < this._source.length) {
569
+ this._position = scan + 1;
570
+ continue;
571
+ }
572
+ break;
494
573
  }
495
574
  if (ch === "#") {
496
575
  while (this._position < this._source.length && this._source[this._position] !== "\n") this._position++;
@@ -499,23 +578,33 @@ var Lexer = class {
499
578
  break;
500
579
  }
501
580
  }
581
+ /**
582
+ * Matches reserved keywords: `true`, `false`, `null`, `NaN`,
583
+ * `Infinity`, `-Infinity`, `Unit`.
584
+ *
585
+ * Mirrors Rust's `Logos` `#[token(...)]` matcher
586
+ * (`bc-dcbor-parse-rust/src/token.rs:12-50, 164`), which is greedy
587
+ * and emits the keyword token *as soon as the literal matches* —
588
+ * subsequent characters become a separate (likely unrecognized) token
589
+ * stream. So input like `truex` lexes as `Bool(true)` followed by an
590
+ * unrecognized run on `x`. Earlier revisions of this port enforced an
591
+ * identifier boundary check (`!_isIdentifierChar(nextChar)`) and
592
+ * rejected the whole prefix as a single `UnrecognizedToken`, which
593
+ * broke span/variant parity with Rust.
594
+ */
502
595
  _tryMatchKeyword() {
503
596
  const keywords = [
597
+ ["-Infinity", token.negInfinity()],
504
598
  ["true", token.bool(true)],
505
599
  ["false", token.bool(false)],
506
600
  ["null", token.null()],
507
601
  ["NaN", token.nan()],
508
602
  ["Infinity", token.infinity()],
509
- ["-Infinity", token.negInfinity()],
510
603
  ["Unit", token.unit()]
511
604
  ];
512
605
  for (const [keyword, tok] of keywords) if (this._matchLiteral(keyword)) {
513
- const nextChar = this._source[this._position];
514
- if (nextChar === void 0 || !this._isIdentifierChar(nextChar)) {
515
- this._tokenEnd = this._position;
516
- return ok(tok);
517
- }
518
- this._position = this._tokenStart;
606
+ this._tokenEnd = this._position;
607
+ return ok(tok);
519
608
  }
520
609
  }
521
610
  _tryMatchDateLiteral() {
@@ -544,9 +633,9 @@ var Lexer = class {
544
633
  if (this._source[this._position + numStr.length] === "(" && !numStr.includes(".") && !numStr.includes("e") && !numStr.includes("E") && !numStr.startsWith("-")) {
545
634
  this._position += numStr.length + 1;
546
635
  this._tokenEnd = this._position;
547
- const tagValue = parseInt(numStr, 10);
548
- if (!Number.isSafeInteger(tagValue) || tagValue < 0) return err(parseError.invalidTagValue(numStr, span(this._tokenStart, this._tokenStart + numStr.length)));
549
- return ok(token.tagValue(tagValue));
636
+ const parsed = parseUsize64(numStr);
637
+ if (parsed === void 0) return err(parseError.invalidTagValue(numStr, span(this._tokenStart, this._tokenStart + numStr.length)));
638
+ return ok(token.tagValue(parsed));
550
639
  }
551
640
  this._position += numStr.length;
552
641
  this._tokenEnd = this._position;
@@ -578,15 +667,6 @@ var Lexer = class {
578
667
  return ok(token.string(fullMatch));
579
668
  }
580
669
  this._position++;
581
- while (this._position < this._source.length) {
582
- const ch = this._source[this._position];
583
- if (ch === "\"" || ch === "\n") {
584
- if (ch === "\"") this._position++;
585
- break;
586
- }
587
- if (ch === "\\") this._position += 2;
588
- else this._position++;
589
- }
590
670
  this._tokenEnd = this._position;
591
671
  return err(parseError.unrecognizedToken(this.span()));
592
672
  }
@@ -643,8 +723,8 @@ var Lexer = class {
643
723
  const numStr = match[1];
644
724
  this._position += fullMatch.length;
645
725
  this._tokenEnd = this._position;
646
- const value = parseInt(numStr, 10);
647
- if (!Number.isSafeInteger(value) || value < 0) return err(parseError.invalidKnownValue(numStr, span(this._tokenStart + 1, this._tokenEnd - 1)));
726
+ const value = parseUsize64(numStr);
727
+ if (value === void 0) return err(parseError.invalidKnownValue(numStr, span(this._tokenStart + 1, this._tokenEnd - 1)));
648
728
  return ok(token.knownValueNumber(value));
649
729
  }
650
730
  match = /^'([a-zA-Z_][a-zA-Z0-9_-]*)'/.exec(remaining);
@@ -656,8 +736,6 @@ var Lexer = class {
656
736
  return ok(token.knownValueName(name));
657
737
  }
658
738
  this._position++;
659
- while (this._position < this._source.length && this._source[this._position] !== "'") this._position++;
660
- if (this._position < this._source.length) this._position++;
661
739
  this._tokenEnd = this._position;
662
740
  return err(parseError.unrecognizedToken(this.span()));
663
741
  }
@@ -703,11 +781,39 @@ var Lexer = class {
703
781
  }
704
782
  return false;
705
783
  }
706
- _isIdentifierChar(ch) {
707
- return /[a-zA-Z0-9_-]/.test(ch);
708
- }
709
784
  };
710
785
  /**
786
+ * Strictly parses a non-negative integer string in the range
787
+ * `[0, 2^64 - 1]`, mirroring Rust `<u64 as FromStr>::from_str`.
788
+ *
789
+ * - Empty input or non-digit characters → `undefined`.
790
+ * - Values that fit in `Number.MAX_SAFE_INTEGER` are returned as plain
791
+ * `number`s, so callers in the common case (tag values like `40000`,
792
+ * known values like `1`) never see a `bigint`.
793
+ * - Values in `(2^53-1, 2^64-1]` are returned as `bigint`. dCBOR's
794
+ * `cbor({ tag, value })` and `KnownValue` constructors both accept
795
+ * `bigint` natively, so the bigint flows through to wire encoding
796
+ * without precision loss.
797
+ * - Values strictly greater than `2^64 - 1` (or negative) are rejected
798
+ * so this parser never produces a tag/known-value outside the
799
+ * `u64` domain — matches Rust which fails `parse::<u64>()` in that
800
+ * case.
801
+ */
802
+ const MAX_U64 = (1n << 64n) - 1n;
803
+ function parseUsize64(s) {
804
+ if (s.length === 0) return void 0;
805
+ if (!/^\d+$/.test(s)) return void 0;
806
+ let value;
807
+ try {
808
+ value = BigInt(s);
809
+ } catch {
810
+ return;
811
+ }
812
+ if (value < 0n || value > MAX_U64) return void 0;
813
+ if (value <= BigInt(Number.MAX_SAFE_INTEGER)) return Number(value);
814
+ return value;
815
+ }
816
+ /**
711
817
  * Converts a hex string to bytes.
712
818
  */
713
819
  function hexToBytes(hex) {
@@ -993,12 +1099,12 @@ function parseMap(lexer) {
993
1099
  const keySpan = lexer.span();
994
1100
  if (map.has(key)) return err(parseError.duplicateMapKey(keySpan));
995
1101
  const colonResult = expectToken(lexer);
996
- if (!colonResult.ok) return colonResult;
997
- if (colonResult.value.type !== "Colon") return err(parseError.expectedColon(lexer.span()));
1102
+ if (!colonResult.ok || colonResult.value.type !== "Colon") return err(parseError.expectedColon(lexer.span()));
998
1103
  const valueResult = parseItem(lexer);
999
1104
  if (!valueResult.ok) {
1000
1105
  if (valueResult.error.type === "UnexpectedToken") {
1001
- if (valueResult.error.token.type === "BraceClose") return err(parseError.expectedMapKey(lexer.span()));
1106
+ const unexpected = valueResult.error;
1107
+ if (unexpected.token.type === "BraceClose") return err(parseError.expectedMapKey(unexpected.span));
1002
1108
  }
1003
1109
  return valueResult;
1004
1110
  }
@@ -1037,12 +1143,20 @@ const composeError = {
1037
1143
  };
1038
1144
  /**
1039
1145
  * Gets the error message for a compose error.
1146
+ *
1147
+ * Mirrors Rust `Error::Display` (`bc-dcbor-parse-rust/src/compose.rs`):
1148
+ * the `ParseError` arm uses `#[error("Invalid CBOR item: {0}")]`, which
1149
+ * formats the inner error via its `Display` impl — *not* the variant
1150
+ * name. So `Error::ParseError(Error::EmptyInput)` formats as
1151
+ * `"Invalid CBOR item: Empty input"`, not
1152
+ * `"Invalid CBOR item: EmptyInput"`. We delegate to {@link errorMessage}
1153
+ * to get the same `Display`-style text.
1040
1154
  */
1041
1155
  function composeErrorMessage(error) {
1042
1156
  switch (error.type) {
1043
1157
  case "OddMapLength": return "Invalid odd map length";
1044
1158
  case "DuplicateMapKey": return "Duplicate map key";
1045
- case "ParseError": return `Invalid CBOR item: ${error.error.type}`;
1159
+ case "ParseError": return `Invalid CBOR item: ${errorMessage(error.error)}`;
1046
1160
  }
1047
1161
  }
1048
1162
  /**