npm - @bcts/dcbor-parse - Versions diffs - 1.0.0-alpha.23 → 1.0.0-beta.0 - Mend

@bcts/dcbor-parse 1.0.0-alpha.23 → 1.0.0-beta.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

package/dist/index.mjs CHANGED Viewed

@@ -223,7 +223,8 @@ function formatMessage(message, source, range) {
 		lineNumber++;
 		lineStart = idx + 1;
 	}
-	const line = source.split("\n")[lineNumber - 1] ?? "";
+	let line = source.split("\n")[lineNumber - 1] ?? "";
+	if (line.endsWith("\r")) line = line.slice(0, -1);
 	const column = Math.max(0, start - lineStart);
 	const underlineLen = Math.max(1, end - start);
 	const caret = " ".repeat(column) + "^".repeat(underlineLen);
@@ -311,8 +312,83 @@ function unwrapErr(result) {
 	if (!result.ok) return result.error;
 	throw new Error("Called unwrapErr on an Ok result");
 }
+/**
+* Renders a {@link Token} the way Rust's
+* `#[derive(Debug)]` on the corresponding enum variant would:
+*
+* - Variant-only tokens (`BraceOpen`, `Comma`, `Null`, `Unit`, `NaN`,
+*   …) print as the bare variant name.
+* - Variant-with-value tokens print as `Variant(value)` where `value`
+*   uses Rust's `Debug` form for the payload type:
+*   `Bool(true)`, `Number(3.14)`, `String("foo")` (with the inner
+*   double quotes preserved — TS keeps them on the slice anyway),
+*   `TagValue(1234)`, `KnownValueNumber(42)`, `TagName("date")`,
+*   `KnownValueName("isA")`, `DateLiteral(2023-02-08T15:30:45.000Z)`,
+*   etc.
+*
+* Mirrors Rust's `Error::UnexpectedToken(Box<Token>, Span)` formatter
+* `#[error("Unexpected token {0:?}")]` so error messages stay
+* byte-identical to Rust.
+*/
 function tokenDebugString(token) {
-	return JSON.stringify(token);
+	switch (token.type) {
+		case "Bool": return `Bool(${token.value ? "true" : "false"})`;
+		case "BraceOpen": return "BraceOpen";
+		case "BraceClose": return "BraceClose";
+		case "BracketOpen": return "BracketOpen";
+		case "BracketClose": return "BracketClose";
+		case "ParenthesisOpen": return "ParenthesisOpen";
+		case "ParenthesisClose": return "ParenthesisClose";
+		case "Colon": return "Colon";
+		case "Comma": return "Comma";
+		case "Null": return "Null";
+		case "NaN": return "NaN";
+		case "Infinity": return "Infinity";
+		case "NegInfinity": return "NegInfinity";
+		case "Unit": return "Unit";
+		case "ByteStringHex": return `ByteStringHex(Ok(${formatBytesDebug(token.value)}))`;
+		case "ByteStringBase64": return `ByteStringBase64(Ok(${formatBytesDebug(token.value)}))`;
+		case "DateLiteral": return `DateLiteral(Ok(${String(token.value)}))`;
+		case "Number": return `Number(${formatNumberDebug(token.value)})`;
+		case "String": return `String(${JSON.stringify(token.value)})`;
+		case "TagValue": return `TagValue(Ok(${tagOrKnownValueDebug(token.value)}))`;
+		case "TagName": return `TagName(${JSON.stringify(token.value)})`;
+		case "KnownValueNumber": return `KnownValueNumber(Ok(${tagOrKnownValueDebug(token.value)}))`;
+		case "KnownValueName": return `KnownValueName(${JSON.stringify(token.value)})`;
+		case "UR": return `UR(Ok(${token.value.string()}))`;
+	}
+}
+/**
+* Renders a `Vec<u8>` the way Rust's `Debug` does:
+* `[0x68, 0x65, 0x6c, 0x6c, 0x6f]`.
+*/
+function formatBytesDebug(bytes) {
+	const parts = [];
+	for (const b of bytes) parts.push(`0x${b.toString(16).padStart(2, "0")}`);
+	return `[${parts.join(", ")}]`;
+}
+/**
+* Renders a JS `number` the way Rust's `f64::Debug` typically prints
+* it — using a decimal point even for integral values (e.g. `42.0`),
+* and `inf` / `-inf` / `NaN` for non-finite numbers. The dCBOR-parse
+* Rust source rarely produces a `Number` token in error messages
+* (numbers normally land in tagged-content contexts), but we still
+* mirror the convention so any error text is consistent with Rust.
+*/
+function formatNumberDebug(n) {
+	if (Number.isNaN(n)) return "NaN";
+	if (!Number.isFinite(n)) return n > 0 ? "inf" : "-inf";
+	if (Number.isInteger(n)) return `${n}.0`;
+	return String(n);
+}
+/**
+* Renders a `u64` payload the way Rust's `Debug` does — a bare digit
+* sequence without trailing `n` for `bigint` values. Mirrors
+* `<u64 as Debug>::fmt` and `<TagValue as Debug>::fmt` (TagValue is a
+* type alias for u64 in `bc-ur` / `dcbor`).
+*/
+function tagOrKnownValueDebug(value) {
+	return typeof value === "bigint" ? value.toString() : String(value);
 }
 //#endregion
@@ -486,11 +562,14 @@ var Lexer = class {
 				this._position++;
 				continue;
 			}
-			if (ch === "/" && this._position + 1 < this._source.length && this._source[this._position + 1] !== "/") {
-				this._position++;
-				while (this._position < this._source.length && this._source[this._position] !== "/") this._position++;
-				if (this._position < this._source.length) this._position++;
-				continue;
+			if (ch === "/") {
+				let scan = this._position + 1;
+				while (scan < this._source.length && this._source[scan] !== "/") scan++;
+				if (scan < this._source.length) {
+					this._position = scan + 1;
+					continue;
+				}
+				break;
 			}
 			if (ch === "#") {
 				while (this._position < this._source.length && this._source[this._position] !== "\n") this._position++;
@@ -499,23 +578,33 @@ var Lexer = class {
 			break;
 		}
 	}
+	/**
+	* Matches reserved keywords: `true`, `false`, `null`, `NaN`,
+	* `Infinity`, `-Infinity`, `Unit`.
+	*
+	* Mirrors Rust's `Logos` `#[token(...)]` matcher
+	* (`bc-dcbor-parse-rust/src/token.rs:12-50, 164`), which is greedy
+	* and emits the keyword token *as soon as the literal matches* —
+	* subsequent characters become a separate (likely unrecognized) token
+	* stream. So input like `truex` lexes as `Bool(true)` followed by an
+	* unrecognized run on `x`. Earlier revisions of this port enforced an
+	* identifier boundary check (`!_isIdentifierChar(nextChar)`) and
+	* rejected the whole prefix as a single `UnrecognizedToken`, which
+	* broke span/variant parity with Rust.
+	*/
 	_tryMatchKeyword() {
 		const keywords = [
+			["-Infinity", token.negInfinity()],
 			["true", token.bool(true)],
 			["false", token.bool(false)],
 			["null", token.null()],
 			["NaN", token.nan()],
 			["Infinity", token.infinity()],
-			["-Infinity", token.negInfinity()],
 			["Unit", token.unit()]
 		];
 		for (const [keyword, tok] of keywords) if (this._matchLiteral(keyword)) {
-			const nextChar = this._source[this._position];
-			if (nextChar === void 0 || !this._isIdentifierChar(nextChar)) {
-				this._tokenEnd = this._position;
-				return ok(tok);
-			}
-			this._position = this._tokenStart;
+			this._tokenEnd = this._position;
+			return ok(tok);
 		}
 	}
 	_tryMatchDateLiteral() {
@@ -544,9 +633,9 @@ var Lexer = class {
 			if (this._source[this._position + numStr.length] === "(" && !numStr.includes(".") && !numStr.includes("e") && !numStr.includes("E") && !numStr.startsWith("-")) {
 				this._position += numStr.length + 1;
 				this._tokenEnd = this._position;
-				const tagValue = parseInt(numStr, 10);
-				if (!Number.isSafeInteger(tagValue) || tagValue < 0) return err(parseError.invalidTagValue(numStr, span(this._tokenStart, this._tokenStart + numStr.length)));
-				return ok(token.tagValue(tagValue));
+				const parsed = parseUsize64(numStr);
+				if (parsed === void 0) return err(parseError.invalidTagValue(numStr, span(this._tokenStart, this._tokenStart + numStr.length)));
+				return ok(token.tagValue(parsed));
 			}
 			this._position += numStr.length;
 			this._tokenEnd = this._position;
@@ -578,15 +667,6 @@ var Lexer = class {
 			return ok(token.string(fullMatch));
 		}
 		this._position++;
-		while (this._position < this._source.length) {
-			const ch = this._source[this._position];
-			if (ch === "\"" || ch === "\n") {
-				if (ch === "\"") this._position++;
-				break;
-			}
-			if (ch === "\\") this._position += 2;
-			else this._position++;
-		}
 		this._tokenEnd = this._position;
 		return err(parseError.unrecognizedToken(this.span()));
 	}
@@ -643,8 +723,8 @@ var Lexer = class {
 			const numStr = match[1];
 			this._position += fullMatch.length;
 			this._tokenEnd = this._position;
-			const value = parseInt(numStr, 10);
-			if (!Number.isSafeInteger(value) || value < 0) return err(parseError.invalidKnownValue(numStr, span(this._tokenStart + 1, this._tokenEnd - 1)));
+			const value = parseUsize64(numStr);
+			if (value === void 0) return err(parseError.invalidKnownValue(numStr, span(this._tokenStart + 1, this._tokenEnd - 1)));
 			return ok(token.knownValueNumber(value));
 		}
 		match = /^'([a-zA-Z_][a-zA-Z0-9_-]*)'/.exec(remaining);
@@ -656,8 +736,6 @@ var Lexer = class {
 			return ok(token.knownValueName(name));
 		}
 		this._position++;
-		while (this._position < this._source.length && this._source[this._position] !== "'") this._position++;
-		if (this._position < this._source.length) this._position++;
 		this._tokenEnd = this._position;
 		return err(parseError.unrecognizedToken(this.span()));
 	}
@@ -703,11 +781,39 @@ var Lexer = class {
 		}
 		return false;
 	}
-	_isIdentifierChar(ch) {
-		return /[a-zA-Z0-9_-]/.test(ch);
-	}
 };
 /**
+* Strictly parses a non-negative integer string in the range
+* `[0, 2^64 - 1]`, mirroring Rust `<u64 as FromStr>::from_str`.
+*
+* - Empty input or non-digit characters → `undefined`.
+* - Values that fit in `Number.MAX_SAFE_INTEGER` are returned as plain
+*   `number`s, so callers in the common case (tag values like `40000`,
+*   known values like `1`) never see a `bigint`.
+* - Values in `(2^53-1, 2^64-1]` are returned as `bigint`. dCBOR's
+*   `cbor({ tag, value })` and `KnownValue` constructors both accept
+*   `bigint` natively, so the bigint flows through to wire encoding
+*   without precision loss.
+* - Values strictly greater than `2^64 - 1` (or negative) are rejected
+*   so this parser never produces a tag/known-value outside the
+*   `u64` domain — matches Rust which fails `parse::<u64>()` in that
+*   case.
+*/
+const MAX_U64 = (1n << 64n) - 1n;
+function parseUsize64(s) {
+	if (s.length === 0) return void 0;
+	if (!/^\d+$/.test(s)) return void 0;
+	let value;
+	try {
+		value = BigInt(s);
+	} catch {
+		return;
+	}
+	if (value < 0n || value > MAX_U64) return void 0;
+	if (value <= BigInt(Number.MAX_SAFE_INTEGER)) return Number(value);
+	return value;
+}
+/**
 * Converts a hex string to bytes.
 */
 function hexToBytes(hex) {
@@ -993,12 +1099,12 @@ function parseMap(lexer) {
 		const keySpan = lexer.span();
 		if (map.has(key)) return err(parseError.duplicateMapKey(keySpan));
 		const colonResult = expectToken(lexer);
-		if (!colonResult.ok) return colonResult;
-		if (colonResult.value.type !== "Colon") return err(parseError.expectedColon(lexer.span()));
+		if (!colonResult.ok || colonResult.value.type !== "Colon") return err(parseError.expectedColon(lexer.span()));
 		const valueResult = parseItem(lexer);
 		if (!valueResult.ok) {
 			if (valueResult.error.type === "UnexpectedToken") {
-				if (valueResult.error.token.type === "BraceClose") return err(parseError.expectedMapKey(lexer.span()));
+				const unexpected = valueResult.error;
+				if (unexpected.token.type === "BraceClose") return err(parseError.expectedMapKey(unexpected.span));
 			}
 			return valueResult;
 		}
@@ -1037,12 +1143,20 @@ const composeError = {
 };
 /**
 * Gets the error message for a compose error.
+*
+* Mirrors Rust `Error::Display` (`bc-dcbor-parse-rust/src/compose.rs`):
+* the `ParseError` arm uses `#[error("Invalid CBOR item: {0}")]`, which
+* formats the inner error via its `Display` impl — *not* the variant
+* name. So `Error::ParseError(Error::EmptyInput)` formats as
+* `"Invalid CBOR item: Empty input"`, not
+* `"Invalid CBOR item: EmptyInput"`. We delegate to {@link errorMessage}
+* to get the same `Display`-style text.
 */
 function composeErrorMessage(error) {
 	switch (error.type) {
 		case "OddMapLength": return "Invalid odd map length";
 		case "DuplicateMapKey": return "Duplicate map key";
-		case "ParseError": return `Invalid CBOR item: ${error.error.type}`;
+		case "ParseError": return `Invalid CBOR item: ${errorMessage(error.error)}`;
 	}
 }
 /**