@bcts/dcbor-parse 1.0.0-alpha.22 → 1.0.0-beta.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +151 -37
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +50 -6
- package/dist/index.d.cts.map +1 -1
- package/dist/index.d.mts +50 -6
- package/dist/index.d.mts.map +1 -1
- package/dist/index.iife.js +151 -37
- package/dist/index.iife.js.map +1 -1
- package/dist/index.mjs +151 -37
- package/dist/index.mjs.map +1 -1
- package/package.json +11 -11
- package/src/compose.ts +10 -2
- package/src/error.ts +139 -5
- package/src/index.ts +61 -16
- package/src/parse.ts +29 -11
- package/src/token.ts +127 -54
package/src/index.ts
CHANGED
|
@@ -46,20 +46,72 @@
|
|
|
46
46
|
* @module dcbor-parse
|
|
47
47
|
*/
|
|
48
48
|
|
|
49
|
+
// =============================================================================
|
|
50
|
+
// Public surface that mirrors Rust `bc-dcbor-parse-rust/src/lib.rs:59-72`.
|
|
51
|
+
//
|
|
52
|
+
// Rust re-exports:
|
|
53
|
+
// - `parse_dcbor_item`, `parse_dcbor_item_partial`
|
|
54
|
+
// - `Token`
|
|
55
|
+
// - `Error as ParseError`, `Result as ParseResult`
|
|
56
|
+
// - `Error as ComposeError`, `Result as ComposeResult`,
|
|
57
|
+
// `compose_dcbor_array`, `compose_dcbor_map`
|
|
58
|
+
// =============================================================================
|
|
59
|
+
|
|
49
60
|
// Parse functions
|
|
50
61
|
export { parseDcborItem, parseDcborItemPartial } from "./parse";
|
|
51
62
|
|
|
52
|
-
// Token types
|
|
53
|
-
export { type Token
|
|
63
|
+
// Token types — Rust exposes only the `Token` enum publicly.
|
|
64
|
+
export { type Token } from "./token";
|
|
65
|
+
|
|
66
|
+
// Error types — Rust exposes only `ParseError` (the error enum) and
|
|
67
|
+
// `ParseResult` (the result type alias).
|
|
68
|
+
export { type ParseError, type ParseResult } from "./error";
|
|
54
69
|
|
|
55
|
-
//
|
|
70
|
+
// Compose types and functions — Rust exposes `ComposeError`,
|
|
71
|
+
// `ComposeResult`, and the two `compose_*` functions.
|
|
72
|
+
export {
|
|
73
|
+
type ComposeError,
|
|
74
|
+
type ComposeResult,
|
|
75
|
+
composeDcborArray,
|
|
76
|
+
composeDcborMap,
|
|
77
|
+
} from "./compose";
|
|
78
|
+
|
|
79
|
+
// =============================================================================
|
|
80
|
+
// TypeScript-only conveniences.
|
|
81
|
+
//
|
|
82
|
+
// Rust models its `Result<T, E>` natively via the `Result<T, E>` enum
|
|
83
|
+
// and `?` operator; the Logos lexer is a private implementation detail.
|
|
84
|
+
// In TypeScript we model `ParseResult<T>` as a discriminated union, so
|
|
85
|
+
// helper constructors and discriminators (`ok`, `err`, `isOk`, `isErr`,
|
|
86
|
+
// `unwrap`, `unwrapErr`, `parseError`, `composeError`, `composeOk`,
|
|
87
|
+
// `composeErr`, `Span`, …) are mandatory ergonomics. They are exported
|
|
88
|
+
// here as TS-only helpers and are **not** part of the Rust↔TS parity
|
|
89
|
+
// surface — Rust callers don't see them, and TS callers writing
|
|
90
|
+
// strictly-portable code shouldn't depend on them.
|
|
91
|
+
//
|
|
92
|
+
// `Lexer` and the `token` constructor namespace are likewise TS-only;
|
|
93
|
+
// in Rust the lexer is created via `Token::lexer(src)` internally and
|
|
94
|
+
// consumers never instantiate it directly. These re-exports stay so
|
|
95
|
+
// existing test code keeps working, but production callers should
|
|
96
|
+
// prefer `parseDcborItem` / `parseDcborItemPartial`.
|
|
97
|
+
// =============================================================================
|
|
98
|
+
|
|
99
|
+
// Token — TS-only convenience namespace for constructing tokens from
|
|
100
|
+
// userland (rare; mostly used in tests). The `Lexer` class is also
|
|
101
|
+
// TS-only — Rust treats `Token::lexer(...)` as an internal API.
|
|
102
|
+
export { token, Lexer } from "./token";
|
|
103
|
+
|
|
104
|
+
// Error helpers — `Span`/`span`/`defaultSpan` are TS-only because Rust
|
|
105
|
+
// uses the `logos::Span` type alias directly. The `ok`/`err`/`isOk`/
|
|
106
|
+
// `isErr`/`unwrap`/`unwrapErr` helpers are TS-only `Result`-modeling
|
|
107
|
+
// utilities. `parseError`, `isDefaultError`, `errorMessage`,
|
|
108
|
+
// `errorSpan`, `fullErrorMessage`, and `defaultParseError` are
|
|
109
|
+
// likewise convenience helpers around the discriminated union.
|
|
56
110
|
export {
|
|
57
111
|
type Span,
|
|
58
112
|
span,
|
|
59
113
|
defaultSpan,
|
|
60
|
-
type ParseError,
|
|
61
114
|
parseError,
|
|
62
|
-
type ParseResult,
|
|
63
115
|
ok,
|
|
64
116
|
err,
|
|
65
117
|
isOk,
|
|
@@ -73,14 +125,7 @@ export {
|
|
|
73
125
|
defaultParseError,
|
|
74
126
|
} from "./error";
|
|
75
127
|
|
|
76
|
-
// Compose
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
type ComposeResult,
|
|
81
|
-
composeOk,
|
|
82
|
-
composeErr,
|
|
83
|
-
composeErrorMessage,
|
|
84
|
-
composeDcborArray,
|
|
85
|
-
composeDcborMap,
|
|
86
|
-
} from "./compose";
|
|
128
|
+
// Compose helpers — `composeError`/`composeOk`/`composeErr`/
|
|
129
|
+
// `composeErrorMessage` are the TS-only counterparts of the
|
|
130
|
+
// `ComposeError`/`ComposeResult` discriminated union ergonomics.
|
|
131
|
+
export { composeError, composeOk, composeErr, composeErrorMessage } from "./compose";
|
package/src/parse.ts
CHANGED
|
@@ -252,7 +252,7 @@ function parseUr(ur: UR, tokenSpan: Span): ParseResult<Cbor> {
|
|
|
252
252
|
);
|
|
253
253
|
}
|
|
254
254
|
|
|
255
|
-
function parseNumberTag(tagValue: number, lexer: Lexer): ParseResult<Cbor> {
|
|
255
|
+
function parseNumberTag(tagValue: number | bigint, lexer: Lexer): ParseResult<Cbor> {
|
|
256
256
|
const itemResult = parseItem(lexer);
|
|
257
257
|
if (!itemResult.ok) {
|
|
258
258
|
return itemResult;
|
|
@@ -267,6 +267,10 @@ function parseNumberTag(tagValue: number, lexer: Lexer): ParseResult<Cbor> {
|
|
|
267
267
|
}
|
|
268
268
|
|
|
269
269
|
if (closeResult.value.type === "ParenthesisClose") {
|
|
270
|
+
// Pass the tag value through as-is: when it's a `bigint` (i.e. a
|
|
271
|
+
// u64 outside the safe-integer range), dCBOR's `cbor({ tag, value })`
|
|
272
|
+
// builder serialises it as a `bigint` tag — matching Rust which
|
|
273
|
+
// accepts the full `0..=2^64-1` range natively.
|
|
270
274
|
return ok(cbor({ tag: tagValue, value: itemResult.value }));
|
|
271
275
|
}
|
|
272
276
|
|
|
@@ -386,23 +390,37 @@ function parseMap(lexer: Lexer): ParseResult<Cbor> {
|
|
|
386
390
|
return err(PE.duplicateMapKey(keySpan));
|
|
387
391
|
}
|
|
388
392
|
|
|
389
|
-
// Expect colon
|
|
393
|
+
// Expect colon.
|
|
394
|
+
//
|
|
395
|
+
// Mirrors Rust `parse.rs:382-395`:
|
|
396
|
+
// ```
|
|
397
|
+
// if let Ok(Token::Colon) = expect_token(lexer) { … }
|
|
398
|
+
// else { return Err(Error::ExpectedColon(lexer.span())); }
|
|
399
|
+
// ```
|
|
400
|
+
// Rust's pattern collapses *every* non-Colon outcome — including
|
|
401
|
+
// `UnexpectedEndOfInput`, `UnrecognizedToken`, and any other error
|
|
402
|
+
// — into `ExpectedColon`. Earlier revisions of this port forwarded
|
|
403
|
+
// the inner error verbatim, so `{1` reported `UnexpectedEndOfInput`
|
|
404
|
+
// instead of `ExpectedColon`.
|
|
390
405
|
const colonResult = expectToken(lexer);
|
|
391
|
-
if (!colonResult.ok) {
|
|
392
|
-
return colonResult;
|
|
393
|
-
}
|
|
394
|
-
|
|
395
|
-
if (colonResult.value.type !== "Colon") {
|
|
406
|
+
if (!colonResult.ok || colonResult.value.type !== "Colon") {
|
|
396
407
|
return err(PE.expectedColon(lexer.span()));
|
|
397
408
|
}
|
|
398
409
|
|
|
399
|
-
// Parse the value
|
|
410
|
+
// Parse the value.
|
|
411
|
+
//
|
|
412
|
+
// Rust `parse.rs:383-389` uses the inner `UnexpectedToken`'s **own**
|
|
413
|
+
// span when it converts to `ExpectedMapKey`. Earlier revisions of
|
|
414
|
+
// this port called `lexer.span()` here, which can drift if the
|
|
415
|
+
// lexer has stepped past the offending `}`. We now use the
|
|
416
|
+
// captured span from `valueResult.error` to preserve Rust's exact
|
|
417
|
+
// span semantics.
|
|
400
418
|
const valueResult = parseItem(lexer);
|
|
401
419
|
if (!valueResult.ok) {
|
|
402
420
|
if (valueResult.error.type === "UnexpectedToken") {
|
|
403
|
-
const
|
|
404
|
-
if (
|
|
405
|
-
return err(PE.expectedMapKey(
|
|
421
|
+
const unexpected = valueResult.error;
|
|
422
|
+
if (unexpected.token.type === "BraceClose") {
|
|
423
|
+
return err(PE.expectedMapKey(unexpected.span));
|
|
406
424
|
}
|
|
407
425
|
}
|
|
408
426
|
return valueResult;
|
package/src/token.ts
CHANGED
|
@@ -17,7 +17,21 @@ import { type Span, span, parseError as PE, type ParseResult, ok, err } from "./
|
|
|
17
17
|
/**
|
|
18
18
|
* Token types produced by the lexer.
|
|
19
19
|
*
|
|
20
|
-
* Corresponds to the Rust `Token` enum in token.rs
|
|
20
|
+
* Corresponds to the Rust `Token` enum in token.rs.
|
|
21
|
+
*
|
|
22
|
+
* **u64 parity**: `TagValue` and `KnownValueNumber` are widened to
|
|
23
|
+
* `number | bigint` because Rust accepts the full `u64` range
|
|
24
|
+
* (`0..=2^64-1`). Values that fit in
|
|
25
|
+
* {@link Number.MAX_SAFE_INTEGER} (`2^53-1`) come through as plain
|
|
26
|
+
* `number`s; anything larger arrives as a `bigint` so callers don't
|
|
27
|
+
* silently lose precision. This matches the way `@bcts/dcbor` already
|
|
28
|
+
* stores large unsigned integers (`number | bigint`) and lets the
|
|
29
|
+
* downstream `cbor({ tag, value })` builder serialize correctly.
|
|
30
|
+
*
|
|
31
|
+
* **String value field**: the lexer keeps the outer double quotes on
|
|
32
|
+
* the slice (e.g. `"\"hello\""`); the parser strips them in
|
|
33
|
+
* `parseString`. Mirrors Rust `Token::String(String)` which holds the
|
|
34
|
+
* raw `lex.slice()` including quotes (`token.rs:115-119`).
|
|
21
35
|
*/
|
|
22
36
|
export type Token =
|
|
23
37
|
| { readonly type: "Bool"; readonly value: boolean }
|
|
@@ -38,9 +52,9 @@ export type Token =
|
|
|
38
52
|
| { readonly type: "DateLiteral"; readonly value: CborDate }
|
|
39
53
|
| { readonly type: "Number"; readonly value: number }
|
|
40
54
|
| { readonly type: "String"; readonly value: string }
|
|
41
|
-
| { readonly type: "TagValue"; readonly value: number }
|
|
55
|
+
| { readonly type: "TagValue"; readonly value: number | bigint }
|
|
42
56
|
| { readonly type: "TagName"; readonly value: string }
|
|
43
|
-
| { readonly type: "KnownValueNumber"; readonly value: number }
|
|
57
|
+
| { readonly type: "KnownValueNumber"; readonly value: number | bigint }
|
|
44
58
|
| { readonly type: "KnownValueName"; readonly value: string }
|
|
45
59
|
| { readonly type: "Unit" }
|
|
46
60
|
| { readonly type: "UR"; readonly value: UR };
|
|
@@ -101,13 +115,13 @@ export const token = {
|
|
|
101
115
|
string(value: string): Token {
|
|
102
116
|
return { type: "String", value };
|
|
103
117
|
},
|
|
104
|
-
tagValue(value: number): Token {
|
|
118
|
+
tagValue(value: number | bigint): Token {
|
|
105
119
|
return { type: "TagValue", value };
|
|
106
120
|
},
|
|
107
121
|
tagName(value: string): Token {
|
|
108
122
|
return { type: "TagName", value };
|
|
109
123
|
},
|
|
110
|
-
knownValueNumber(value: number): Token {
|
|
124
|
+
knownValueNumber(value: number | bigint): Token {
|
|
111
125
|
return { type: "KnownValueNumber", value };
|
|
112
126
|
},
|
|
113
127
|
knownValueName(value: string): Token {
|
|
@@ -199,20 +213,29 @@ export class Lexer {
|
|
|
199
213
|
continue;
|
|
200
214
|
}
|
|
201
215
|
|
|
202
|
-
// Skip inline comments:
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
216
|
+
// Skip inline comments: `/[^/]*/` (matches the Rust skip regex
|
|
217
|
+
// `/[^/]*/`). Note that the Rust regex *does* match `//` (zero
|
|
218
|
+
// non-slash characters between the two slashes), so an empty
|
|
219
|
+
// comment is a valid no-op for the lexer. We accept that case too;
|
|
220
|
+
// earlier revisions of this port required at least one non-slash
|
|
221
|
+
// body character, which broke parity with Rust on inputs like
|
|
222
|
+
// `// trailing thought`.
|
|
223
|
+
if (ch === "/") {
|
|
224
|
+
// Confirm there is a closing slash somewhere ahead. If not, fall
|
|
225
|
+
// through and let the punctuation matcher report an
|
|
226
|
+
// unrecognized token (Rust would equally fail to match the skip
|
|
227
|
+
// regex and emit an `UnrecognizedToken`).
|
|
228
|
+
let scan = this._position + 1;
|
|
229
|
+
while (scan < this._source.length && this._source[scan] !== "/") {
|
|
230
|
+
scan++;
|
|
211
231
|
}
|
|
212
|
-
if (
|
|
213
|
-
this._position
|
|
232
|
+
if (scan < this._source.length) {
|
|
233
|
+
this._position = scan + 1; // jump past the closing /
|
|
234
|
+
continue;
|
|
214
235
|
}
|
|
215
|
-
|
|
236
|
+
// No closing /: not a comment — leave _position alone and break
|
|
237
|
+
// out so the punctuation matcher can flag the unrecognized `/`.
|
|
238
|
+
break;
|
|
216
239
|
}
|
|
217
240
|
|
|
218
241
|
// Skip end-of-line comments: #...
|
|
@@ -227,27 +250,38 @@ export class Lexer {
|
|
|
227
250
|
}
|
|
228
251
|
}
|
|
229
252
|
|
|
253
|
+
/**
|
|
254
|
+
* Matches reserved keywords: `true`, `false`, `null`, `NaN`,
|
|
255
|
+
* `Infinity`, `-Infinity`, `Unit`.
|
|
256
|
+
*
|
|
257
|
+
* Mirrors Rust's `Logos` `#[token(...)]` matcher
|
|
258
|
+
* (`bc-dcbor-parse-rust/src/token.rs:12-50, 164`), which is greedy
|
|
259
|
+
* and emits the keyword token *as soon as the literal matches* —
|
|
260
|
+
* subsequent characters become a separate (likely unrecognized) token
|
|
261
|
+
* stream. So input like `truex` lexes as `Bool(true)` followed by an
|
|
262
|
+
* unrecognized run on `x`. Earlier revisions of this port enforced an
|
|
263
|
+
* identifier boundary check (`!_isIdentifierChar(nextChar)`) and
|
|
264
|
+
* rejected the whole prefix as a single `UnrecognizedToken`, which
|
|
265
|
+
* broke span/variant parity with Rust.
|
|
266
|
+
*/
|
|
230
267
|
private _tryMatchKeyword(): ParseResult<Token> | undefined {
|
|
231
268
|
const keywords: [string, Token][] = [
|
|
269
|
+
// Order matters: `-Infinity` must come before any other `-` based
|
|
270
|
+
// matcher (we lex this before numbers, so the `-` doesn't get
|
|
271
|
+
// siphoned off as a sign).
|
|
272
|
+
["-Infinity", token.negInfinity()],
|
|
232
273
|
["true", token.bool(true)],
|
|
233
274
|
["false", token.bool(false)],
|
|
234
275
|
["null", token.null()],
|
|
235
276
|
["NaN", token.nan()],
|
|
236
277
|
["Infinity", token.infinity()],
|
|
237
|
-
["-Infinity", token.negInfinity()],
|
|
238
278
|
["Unit", token.unit()],
|
|
239
279
|
];
|
|
240
280
|
|
|
241
281
|
for (const [keyword, tok] of keywords) {
|
|
242
282
|
if (this._matchLiteral(keyword)) {
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
if (nextChar === undefined || !this._isIdentifierChar(nextChar)) {
|
|
246
|
-
this._tokenEnd = this._position;
|
|
247
|
-
return ok(tok);
|
|
248
|
-
}
|
|
249
|
-
// Reset position if it was a partial match
|
|
250
|
-
this._position = this._tokenStart;
|
|
283
|
+
this._tokenEnd = this._position;
|
|
284
|
+
return ok(tok);
|
|
251
285
|
}
|
|
252
286
|
}
|
|
253
287
|
|
|
@@ -300,18 +334,24 @@ export class Lexer {
|
|
|
300
334
|
!numStr.includes("E") &&
|
|
301
335
|
!numStr.startsWith("-")
|
|
302
336
|
) {
|
|
303
|
-
// It's a tag value
|
|
337
|
+
// It's a tag value. Mirrors Rust `token.rs:128-136`:
|
|
338
|
+
// `stripped.parse::<TagValue>()` accepts the full `u64` range
|
|
339
|
+
// (`0..=2^64-1`). We use `BigInt` to get exact-integer parsing,
|
|
340
|
+
// then narrow to `number` when the value fits in
|
|
341
|
+
// `Number.MAX_SAFE_INTEGER` so callers don't pay the bigint
|
|
342
|
+
// tax for tag numbers in the common range. Anything outside
|
|
343
|
+
// `[0, 2^64-1]` is reported as `InvalidTagValue` matching Rust.
|
|
304
344
|
this._position += numStr.length + 1; // Include the (
|
|
305
345
|
this._tokenEnd = this._position;
|
|
306
346
|
|
|
307
|
-
const
|
|
308
|
-
if (
|
|
347
|
+
const parsed = parseUsize64(numStr);
|
|
348
|
+
if (parsed === undefined) {
|
|
309
349
|
return err(
|
|
310
350
|
PE.invalidTagValue(numStr, span(this._tokenStart, this._tokenStart + numStr.length)),
|
|
311
351
|
);
|
|
312
352
|
}
|
|
313
353
|
|
|
314
|
-
return ok(token.tagValue(
|
|
354
|
+
return ok(token.tagValue(parsed));
|
|
315
355
|
}
|
|
316
356
|
|
|
317
357
|
// It's a regular number
|
|
@@ -363,20 +403,15 @@ export class Lexer {
|
|
|
363
403
|
return ok(token.string(fullMatch));
|
|
364
404
|
}
|
|
365
405
|
|
|
366
|
-
// Invalid string
|
|
406
|
+
// Invalid string: emit an unrecognized token covering just the
|
|
407
|
+
// opening `"` and let the next call to `next()` re-lex. Mirrors
|
|
408
|
+
// Rust's Logos behaviour when the `String` regex fails to match —
|
|
409
|
+
// the lexer emits `Error::default()` (which `expect_token` upgrades
|
|
410
|
+
// to `UnrecognizedToken(span)` for the single character) and
|
|
411
|
+
// recovers at the very next byte. Earlier revisions of this port
|
|
412
|
+
// consumed through the next `"` or `\n`, which inflated the error
|
|
413
|
+
// span beyond what Rust reports.
|
|
367
414
|
this._position++;
|
|
368
|
-
while (this._position < this._source.length) {
|
|
369
|
-
const ch = this._source[this._position];
|
|
370
|
-
if (ch === '"' || ch === "\n") {
|
|
371
|
-
if (ch === '"') this._position++;
|
|
372
|
-
break;
|
|
373
|
-
}
|
|
374
|
-
if (ch === "\\") {
|
|
375
|
-
this._position += 2;
|
|
376
|
-
} else {
|
|
377
|
-
this._position++;
|
|
378
|
-
}
|
|
379
|
-
}
|
|
380
415
|
this._tokenEnd = this._position;
|
|
381
416
|
return err(PE.unrecognizedToken(this.span()));
|
|
382
417
|
}
|
|
@@ -470,8 +505,11 @@ export class Lexer {
|
|
|
470
505
|
this._position += fullMatch.length;
|
|
471
506
|
this._tokenEnd = this._position;
|
|
472
507
|
|
|
473
|
-
|
|
474
|
-
|
|
508
|
+
// Mirrors Rust `token.rs:146-153`: `stripped.parse::<u64>()`
|
|
509
|
+
// accepts the full `u64` range. We share the helper used for
|
|
510
|
+
// `TagValue` to get the same narrow-when-safe-else-bigint path.
|
|
511
|
+
const value = parseUsize64(numStr);
|
|
512
|
+
if (value === undefined) {
|
|
475
513
|
return err(PE.invalidKnownValue(numStr, span(this._tokenStart + 1, this._tokenEnd - 1)));
|
|
476
514
|
}
|
|
477
515
|
|
|
@@ -491,14 +529,14 @@ export class Lexer {
|
|
|
491
529
|
return ok(token.knownValueName(name));
|
|
492
530
|
}
|
|
493
531
|
|
|
494
|
-
// Invalid known value
|
|
532
|
+
// Invalid known value: emit an unrecognized token covering just the
|
|
533
|
+
// opening `'` and let the next call to `next()` re-lex. Mirrors
|
|
534
|
+
// Rust's Logos behaviour when neither `KnownValueNumber` nor
|
|
535
|
+
// `KnownValueName` regex matches — the lexer emits `Error::default()`
|
|
536
|
+
// (single character span) and recovers at the next byte. Earlier
|
|
537
|
+
// revisions of this port consumed through the closing `'`, which
|
|
538
|
+
// inflated the error span beyond what Rust reports.
|
|
495
539
|
this._position++;
|
|
496
|
-
while (this._position < this._source.length && this._source[this._position] !== "'") {
|
|
497
|
-
this._position++;
|
|
498
|
-
}
|
|
499
|
-
if (this._position < this._source.length) {
|
|
500
|
-
this._position++;
|
|
501
|
-
}
|
|
502
540
|
this._tokenEnd = this._position;
|
|
503
541
|
return err(PE.unrecognizedToken(this.span()));
|
|
504
542
|
}
|
|
@@ -557,10 +595,45 @@ export class Lexer {
|
|
|
557
595
|
}
|
|
558
596
|
return false;
|
|
559
597
|
}
|
|
598
|
+
}
|
|
560
599
|
|
|
561
|
-
|
|
562
|
-
|
|
600
|
+
/**
|
|
601
|
+
* Strictly parses a non-negative integer string in the range
|
|
602
|
+
* `[0, 2^64 - 1]`, mirroring Rust `<u64 as FromStr>::from_str`.
|
|
603
|
+
*
|
|
604
|
+
* - Empty input or non-digit characters → `undefined`.
|
|
605
|
+
* - Values that fit in `Number.MAX_SAFE_INTEGER` are returned as plain
|
|
606
|
+
* `number`s, so callers in the common case (tag values like `40000`,
|
|
607
|
+
* known values like `1`) never see a `bigint`.
|
|
608
|
+
* - Values in `(2^53-1, 2^64-1]` are returned as `bigint`. dCBOR's
|
|
609
|
+
* `cbor({ tag, value })` and `KnownValue` constructors both accept
|
|
610
|
+
* `bigint` natively, so the bigint flows through to wire encoding
|
|
611
|
+
* without precision loss.
|
|
612
|
+
* - Values strictly greater than `2^64 - 1` (or negative) are rejected
|
|
613
|
+
* so this parser never produces a tag/known-value outside the
|
|
614
|
+
* `u64` domain — matches Rust which fails `parse::<u64>()` in that
|
|
615
|
+
* case.
|
|
616
|
+
*/
|
|
617
|
+
const MAX_U64: bigint = (1n << 64n) - 1n;
|
|
618
|
+
function parseUsize64(s: string): number | bigint | undefined {
|
|
619
|
+
if (s.length === 0) return undefined;
|
|
620
|
+
// The regex feeding this helper already rejects sign / leading
|
|
621
|
+
// zeros / non-digits; this guard is defensive in case the helper is
|
|
622
|
+
// reused elsewhere.
|
|
623
|
+
if (!/^\d+$/.test(s)) return undefined;
|
|
624
|
+
let value: bigint;
|
|
625
|
+
try {
|
|
626
|
+
value = BigInt(s);
|
|
627
|
+
} catch {
|
|
628
|
+
return undefined;
|
|
629
|
+
}
|
|
630
|
+
if (value < 0n || value > MAX_U64) return undefined;
|
|
631
|
+
// Narrow to plain `number` when safe so common-case callers never
|
|
632
|
+
// see a `bigint`.
|
|
633
|
+
if (value <= BigInt(Number.MAX_SAFE_INTEGER)) {
|
|
634
|
+
return Number(value);
|
|
563
635
|
}
|
|
636
|
+
return value;
|
|
564
637
|
}
|
|
565
638
|
|
|
566
639
|
/**
|