@bcts/dcbor-parse 1.0.0-alpha.16 → 1.0.0-alpha.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/token.ts CHANGED
@@ -123,30 +123,30 @@ export const token = {
123
123
  * Corresponds to the Rust `logos::Lexer` used in parse.rs
124
124
  */
125
125
  export class Lexer {
126
- readonly #source: string;
127
- #position: number;
128
- #tokenStart: number;
129
- #tokenEnd: number;
126
+ private readonly _source: string;
127
+ private _position: number;
128
+ private _tokenStart: number;
129
+ private _tokenEnd: number;
130
130
 
131
131
  constructor(source: string) {
132
- this.#source = source;
133
- this.#position = 0;
134
- this.#tokenStart = 0;
135
- this.#tokenEnd = 0;
132
+ this._source = source;
133
+ this._position = 0;
134
+ this._tokenStart = 0;
135
+ this._tokenEnd = 0;
136
136
  }
137
137
 
138
138
  /**
139
139
  * Gets the current span (position range of the last token).
140
140
  */
141
141
  span(): Span {
142
- return span(this.#tokenStart, this.#tokenEnd);
142
+ return span(this._tokenStart, this._tokenEnd);
143
143
  }
144
144
 
145
145
  /**
146
146
  * Gets the slice of source corresponding to the last token.
147
147
  */
148
148
  slice(): string {
149
- return this.#source.slice(this.#tokenStart, this.#tokenEnd);
149
+ return this._source.slice(this._tokenStart, this._tokenEnd);
150
150
  }
151
151
 
152
152
  /**
@@ -154,67 +154,67 @@ export class Lexer {
154
154
  * Returns a Result to handle lexing errors.
155
155
  */
156
156
  next(): ParseResult<Token> | undefined {
157
- this.#skipWhitespaceAndComments();
157
+ this._skipWhitespaceAndComments();
158
158
 
159
- if (this.#position >= this.#source.length) {
159
+ if (this._position >= this._source.length) {
160
160
  return undefined;
161
161
  }
162
162
 
163
- this.#tokenStart = this.#position;
163
+ this._tokenStart = this._position;
164
164
 
165
165
  // Try to match tokens in order of specificity
166
166
  const result =
167
- this.#tryMatchKeyword() ??
168
- this.#tryMatchDateLiteral() ??
169
- this.#tryMatchTagValueOrNumber() ??
170
- this.#tryMatchTagName() ??
171
- this.#tryMatchString() ??
172
- this.#tryMatchByteStringHex() ??
173
- this.#tryMatchByteStringBase64() ??
174
- this.#tryMatchKnownValue() ??
175
- this.#tryMatchUR() ??
176
- this.#tryMatchPunctuation();
167
+ this._tryMatchKeyword() ??
168
+ this._tryMatchDateLiteral() ??
169
+ this._tryMatchTagValueOrNumber() ??
170
+ this._tryMatchTagName() ??
171
+ this._tryMatchString() ??
172
+ this._tryMatchByteStringHex() ??
173
+ this._tryMatchByteStringBase64() ??
174
+ this._tryMatchKnownValue() ??
175
+ this._tryMatchUR() ??
176
+ this._tryMatchPunctuation();
177
177
 
178
178
  if (result === undefined) {
179
179
  // Unrecognized token - consume one character
180
- this.#position++;
181
- this.#tokenEnd = this.#position;
180
+ this._position++;
181
+ this._tokenEnd = this._position;
182
182
  return err(PE.unrecognizedToken(this.span()));
183
183
  }
184
184
 
185
185
  return result;
186
186
  }
187
187
 
188
- #skipWhitespaceAndComments(): void {
189
- while (this.#position < this.#source.length) {
190
- const ch = this.#source[this.#position];
188
+ private _skipWhitespaceAndComments(): void {
189
+ while (this._position < this._source.length) {
190
+ const ch = this._source[this._position];
191
191
 
192
192
  // Skip whitespace
193
193
  if (ch === " " || ch === "\t" || ch === "\r" || ch === "\n" || ch === "\f") {
194
- this.#position++;
194
+ this._position++;
195
195
  continue;
196
196
  }
197
197
 
198
198
  // Skip inline comments: /.../ (not preceded by another /)
199
199
  if (
200
200
  ch === "/" &&
201
- this.#position + 1 < this.#source.length &&
202
- this.#source[this.#position + 1] !== "/"
201
+ this._position + 1 < this._source.length &&
202
+ this._source[this._position + 1] !== "/"
203
203
  ) {
204
- this.#position++; // Skip opening /
205
- while (this.#position < this.#source.length && this.#source[this.#position] !== "/") {
206
- this.#position++;
204
+ this._position++; // Skip opening /
205
+ while (this._position < this._source.length && this._source[this._position] !== "/") {
206
+ this._position++;
207
207
  }
208
- if (this.#position < this.#source.length) {
209
- this.#position++; // Skip closing /
208
+ if (this._position < this._source.length) {
209
+ this._position++; // Skip closing /
210
210
  }
211
211
  continue;
212
212
  }
213
213
 
214
214
  // Skip end-of-line comments: #...
215
215
  if (ch === "#") {
216
- while (this.#position < this.#source.length && this.#source[this.#position] !== "\n") {
217
- this.#position++;
216
+ while (this._position < this._source.length && this._source[this._position] !== "\n") {
217
+ this._position++;
218
218
  }
219
219
  continue;
220
220
  }
@@ -223,7 +223,7 @@ export class Lexer {
223
223
  }
224
224
  }
225
225
 
226
- #tryMatchKeyword(): ParseResult<Token> | undefined {
226
+ private _tryMatchKeyword(): ParseResult<Token> | undefined {
227
227
  const keywords: [string, Token][] = [
228
228
  ["true", token.bool(true)],
229
229
  ["false", token.bool(false)],
@@ -235,31 +235,31 @@ export class Lexer {
235
235
  ];
236
236
 
237
237
  for (const [keyword, tok] of keywords) {
238
- if (this.#matchLiteral(keyword)) {
238
+ if (this._matchLiteral(keyword)) {
239
239
  // Make sure it's not part of a longer identifier
240
- const nextChar = this.#source[this.#position];
241
- if (nextChar === undefined || !this.#isIdentifierChar(nextChar)) {
242
- this.#tokenEnd = this.#position;
240
+ const nextChar = this._source[this._position];
241
+ if (nextChar === undefined || !this._isIdentifierChar(nextChar)) {
242
+ this._tokenEnd = this._position;
243
243
  return ok(tok);
244
244
  }
245
245
  // Reset position if it was a partial match
246
- this.#position = this.#tokenStart;
246
+ this._position = this._tokenStart;
247
247
  }
248
248
  }
249
249
 
250
250
  return undefined;
251
251
  }
252
252
 
253
- #tryMatchDateLiteral(): ParseResult<Token> | undefined {
253
+ private _tryMatchDateLiteral(): ParseResult<Token> | undefined {
254
254
  // ISO-8601 date: YYYY-MM-DD or YYYY-MM-DDTHH:MM:SS...
255
255
  const dateRegex = /^\d{4}-\d{2}-\d{2}(?:T\d{2}:\d{2}:\d{2}(?:\.\d+)?(?:Z|[+-]\d{2}:\d{2})?)?/;
256
- const remaining = this.#source.slice(this.#position);
256
+ const remaining = this._source.slice(this._position);
257
257
  const match = dateRegex.exec(remaining);
258
258
 
259
259
  if (match !== null) {
260
260
  const dateStr = match[0];
261
- this.#position += dateStr.length;
262
- this.#tokenEnd = this.#position;
261
+ this._position += dateStr.length;
262
+ this._tokenEnd = this._position;
263
263
 
264
264
  // Validate date components before parsing to match Rust's strict behavior
265
265
  if (!isValidDateString(dateStr)) {
@@ -277,16 +277,16 @@ export class Lexer {
277
277
  return undefined;
278
278
  }
279
279
 
280
- #tryMatchTagValueOrNumber(): ParseResult<Token> | undefined {
280
+ private _tryMatchTagValueOrNumber(): ParseResult<Token> | undefined {
281
281
  // Check for tag value: integer followed by (
282
282
  // Or just a number
283
283
  const numberRegex = /^-?(?:0|[1-9]\d*)(?:\.\d+)?(?:[eE][+-]?\d+)?/;
284
- const remaining = this.#source.slice(this.#position);
284
+ const remaining = this._source.slice(this._position);
285
285
  const match = numberRegex.exec(remaining);
286
286
 
287
287
  if (match !== null) {
288
288
  const numStr = match[0];
289
- const nextChar = this.#source[this.#position + numStr.length];
289
+ const nextChar = this._source[this._position + numStr.length];
290
290
 
291
291
  // Check if this is a tag value (integer followed by parenthesis)
292
292
  if (
@@ -297,13 +297,13 @@ export class Lexer {
297
297
  !numStr.startsWith("-")
298
298
  ) {
299
299
  // It's a tag value
300
- this.#position += numStr.length + 1; // Include the (
301
- this.#tokenEnd = this.#position;
300
+ this._position += numStr.length + 1; // Include the (
301
+ this._tokenEnd = this._position;
302
302
 
303
303
  const tagValue = parseInt(numStr, 10);
304
304
  if (!Number.isSafeInteger(tagValue) || tagValue < 0) {
305
305
  return err(
306
- PE.invalidTagValue(numStr, span(this.#tokenStart, this.#tokenStart + numStr.length)),
306
+ PE.invalidTagValue(numStr, span(this._tokenStart, this._tokenStart + numStr.length)),
307
307
  );
308
308
  }
309
309
 
@@ -311,8 +311,8 @@ export class Lexer {
311
311
  }
312
312
 
313
313
  // It's a regular number
314
- this.#position += numStr.length;
315
- this.#tokenEnd = this.#position;
314
+ this._position += numStr.length;
315
+ this._tokenEnd = this._position;
316
316
 
317
317
  const num = parseFloat(numStr);
318
318
  return ok(token.number(num));
@@ -321,17 +321,17 @@ export class Lexer {
321
321
  return undefined;
322
322
  }
323
323
 
324
- #tryMatchTagName(): ParseResult<Token> | undefined {
324
+ private _tryMatchTagName(): ParseResult<Token> | undefined {
325
325
  // Tag name: identifier followed by (
326
326
  const tagNameRegex = /^[a-zA-Z_][a-zA-Z0-9_-]*\(/;
327
- const remaining = this.#source.slice(this.#position);
327
+ const remaining = this._source.slice(this._position);
328
328
  const match = tagNameRegex.exec(remaining);
329
329
 
330
330
  if (match !== null) {
331
331
  const fullMatch = match[0];
332
332
  const name = fullMatch.slice(0, -1); // Remove trailing (
333
- this.#position += fullMatch.length;
334
- this.#tokenEnd = this.#position;
333
+ this._position += fullMatch.length;
334
+ this._tokenEnd = this._position;
335
335
 
336
336
  return ok(token.tagName(name));
337
337
  }
@@ -339,64 +339,64 @@ export class Lexer {
339
339
  return undefined;
340
340
  }
341
341
 
342
- #tryMatchString(): ParseResult<Token> | undefined {
343
- if (this.#source[this.#position] !== '"') {
342
+ private _tryMatchString(): ParseResult<Token> | undefined {
343
+ if (this._source[this._position] !== '"') {
344
344
  return undefined;
345
345
  }
346
346
 
347
347
  // JavaScript-style string with escape sequences
348
348
  // eslint-disable-next-line no-control-regex
349
349
  const stringRegex = /^"([^"\\\x00-\x1F]|\\(["\\bnfrt/]|u[a-fA-F0-9]{4}))*"/;
350
- const remaining = this.#source.slice(this.#position);
350
+ const remaining = this._source.slice(this._position);
351
351
  const match = stringRegex.exec(remaining);
352
352
 
353
353
  if (match !== null) {
354
354
  const fullMatch = match[0];
355
- this.#position += fullMatch.length;
356
- this.#tokenEnd = this.#position;
355
+ this._position += fullMatch.length;
356
+ this._tokenEnd = this._position;
357
357
 
358
358
  // Return the full string including quotes
359
359
  return ok(token.string(fullMatch));
360
360
  }
361
361
 
362
362
  // Invalid string - try to find where it ends for better error reporting
363
- this.#position++;
364
- while (this.#position < this.#source.length) {
365
- const ch = this.#source[this.#position];
363
+ this._position++;
364
+ while (this._position < this._source.length) {
365
+ const ch = this._source[this._position];
366
366
  if (ch === '"' || ch === "\n") {
367
- if (ch === '"') this.#position++;
367
+ if (ch === '"') this._position++;
368
368
  break;
369
369
  }
370
370
  if (ch === "\\") {
371
- this.#position += 2;
371
+ this._position += 2;
372
372
  } else {
373
- this.#position++;
373
+ this._position++;
374
374
  }
375
375
  }
376
- this.#tokenEnd = this.#position;
376
+ this._tokenEnd = this._position;
377
377
  return err(PE.unrecognizedToken(this.span()));
378
378
  }
379
379
 
380
- #tryMatchByteStringHex(): ParseResult<Token> | undefined {
380
+ private _tryMatchByteStringHex(): ParseResult<Token> | undefined {
381
381
  // h'...'
382
- if (!this.#matchLiteral("h'")) {
382
+ if (!this._matchLiteral("h'")) {
383
383
  return undefined;
384
384
  }
385
385
 
386
386
  const hexRegex = /^[0-9a-fA-F]*/;
387
- const remaining = this.#source.slice(this.#position);
387
+ const remaining = this._source.slice(this._position);
388
388
  const match = hexRegex.exec(remaining);
389
389
  const hexPart = match !== null ? match[0] : "";
390
390
 
391
- this.#position += hexPart.length;
391
+ this._position += hexPart.length;
392
392
 
393
- if (this.#source[this.#position] !== "'") {
394
- this.#tokenEnd = this.#position;
393
+ if (this._source[this._position] !== "'") {
394
+ this._tokenEnd = this._position;
395
395
  return err(PE.invalidHexString(this.span()));
396
396
  }
397
397
 
398
- this.#position++; // Skip closing '
399
- this.#tokenEnd = this.#position;
398
+ this._position++; // Skip closing '
399
+ this._tokenEnd = this._position;
400
400
 
401
401
  // Check that hex string has even length
402
402
  if (hexPart.length % 2 !== 0) {
@@ -408,26 +408,26 @@ export class Lexer {
408
408
  return ok(token.byteStringHex(bytes));
409
409
  }
410
410
 
411
- #tryMatchByteStringBase64(): ParseResult<Token> | undefined {
411
+ private _tryMatchByteStringBase64(): ParseResult<Token> | undefined {
412
412
  // b64'...'
413
- if (!this.#matchLiteral("b64'")) {
413
+ if (!this._matchLiteral("b64'")) {
414
414
  return undefined;
415
415
  }
416
416
 
417
417
  const base64Regex = /^[A-Za-z0-9+/=]*/;
418
- const remaining = this.#source.slice(this.#position);
418
+ const remaining = this._source.slice(this._position);
419
419
  const match = base64Regex.exec(remaining);
420
420
  const base64Part = match !== null ? match[0] : "";
421
421
 
422
- this.#position += base64Part.length;
422
+ this._position += base64Part.length;
423
423
 
424
- if (this.#source[this.#position] !== "'") {
425
- this.#tokenEnd = this.#position;
424
+ if (this._source[this._position] !== "'") {
425
+ this._tokenEnd = this._position;
426
426
  return err(PE.invalidBase64String(this.span()));
427
427
  }
428
428
 
429
- this.#position++; // Skip closing '
430
- this.#tokenEnd = this.#position;
429
+ this._position++; // Skip closing '
430
+ this._tokenEnd = this._position;
431
431
 
432
432
  // Check minimum length requirement (2 characters)
433
433
  if (base64Part.length < 2) {
@@ -443,32 +443,32 @@ export class Lexer {
443
443
  }
444
444
  }
445
445
 
446
- #tryMatchKnownValue(): ParseResult<Token> | undefined {
447
- if (this.#source[this.#position] !== "'") {
446
+ private _tryMatchKnownValue(): ParseResult<Token> | undefined {
447
+ if (this._source[this._position] !== "'") {
448
448
  return undefined;
449
449
  }
450
450
 
451
451
  // Check for empty string '' (Unit)
452
- if (this.#source[this.#position + 1] === "'") {
453
- this.#position += 2;
454
- this.#tokenEnd = this.#position;
452
+ if (this._source[this._position + 1] === "'") {
453
+ this._position += 2;
454
+ this._tokenEnd = this._position;
455
455
  return ok(token.knownValueName(""));
456
456
  }
457
457
 
458
458
  // Check for numeric known value: '0' or '[1-9][0-9]*'
459
459
  const numericRegex = /^'(0|[1-9][0-9]*)'/;
460
- const remaining = this.#source.slice(this.#position);
460
+ const remaining = this._source.slice(this._position);
461
461
  let match = numericRegex.exec(remaining);
462
462
 
463
463
  if (match !== null) {
464
464
  const fullMatch = match[0];
465
465
  const numStr = match[1];
466
- this.#position += fullMatch.length;
467
- this.#tokenEnd = this.#position;
466
+ this._position += fullMatch.length;
467
+ this._tokenEnd = this._position;
468
468
 
469
469
  const value = parseInt(numStr, 10);
470
470
  if (!Number.isSafeInteger(value) || value < 0) {
471
- return err(PE.invalidKnownValue(numStr, span(this.#tokenStart + 1, this.#tokenEnd - 1)));
471
+ return err(PE.invalidKnownValue(numStr, span(this._tokenStart + 1, this._tokenEnd - 1)));
472
472
  }
473
473
 
474
474
  return ok(token.knownValueNumber(value));
@@ -481,34 +481,34 @@ export class Lexer {
481
481
  if (match !== null) {
482
482
  const fullMatch = match[0];
483
483
  const name = match[1];
484
- this.#position += fullMatch.length;
485
- this.#tokenEnd = this.#position;
484
+ this._position += fullMatch.length;
485
+ this._tokenEnd = this._position;
486
486
 
487
487
  return ok(token.knownValueName(name));
488
488
  }
489
489
 
490
490
  // Invalid known value
491
- this.#position++;
492
- while (this.#position < this.#source.length && this.#source[this.#position] !== "'") {
493
- this.#position++;
491
+ this._position++;
492
+ while (this._position < this._source.length && this._source[this._position] !== "'") {
493
+ this._position++;
494
494
  }
495
- if (this.#position < this.#source.length) {
496
- this.#position++;
495
+ if (this._position < this._source.length) {
496
+ this._position++;
497
497
  }
498
- this.#tokenEnd = this.#position;
498
+ this._tokenEnd = this._position;
499
499
  return err(PE.unrecognizedToken(this.span()));
500
500
  }
501
501
 
502
- #tryMatchUR(): ParseResult<Token> | undefined {
502
+ private _tryMatchUR(): ParseResult<Token> | undefined {
503
503
  // ur:type/data
504
504
  const urRegex = /^ur:([a-zA-Z0-9][a-zA-Z0-9-]*)\/([a-zA-Z]{8,})/;
505
- const remaining = this.#source.slice(this.#position);
505
+ const remaining = this._source.slice(this._position);
506
506
  const match = urRegex.exec(remaining);
507
507
 
508
508
  if (match !== null) {
509
509
  const fullMatch = match[0];
510
- this.#position += fullMatch.length;
511
- this.#tokenEnd = this.#position;
510
+ this._position += fullMatch.length;
511
+ this._tokenEnd = this._position;
512
512
 
513
513
  try {
514
514
  const ur = UR.fromURString(fullMatch);
@@ -522,8 +522,8 @@ export class Lexer {
522
522
  return undefined;
523
523
  }
524
524
 
525
- #tryMatchPunctuation(): ParseResult<Token> | undefined {
526
- const ch = this.#source[this.#position];
525
+ private _tryMatchPunctuation(): ParseResult<Token> | undefined {
526
+ const ch = this._source[this._position];
527
527
 
528
528
  const punctuation: Record<string, Token> = {
529
529
  "{": token.braceOpen(),
@@ -538,23 +538,23 @@ export class Lexer {
538
538
 
539
539
  const matched = punctuation[ch];
540
540
  if (matched !== undefined) {
541
- this.#position++;
542
- this.#tokenEnd = this.#position;
541
+ this._position++;
542
+ this._tokenEnd = this._position;
543
543
  return ok(matched);
544
544
  }
545
545
 
546
546
  return undefined;
547
547
  }
548
548
 
549
- #matchLiteral(literal: string): boolean {
550
- if (this.#source.slice(this.#position, this.#position + literal.length) === literal) {
551
- this.#position += literal.length;
549
+ private _matchLiteral(literal: string): boolean {
550
+ if (this._source.slice(this._position, this._position + literal.length) === literal) {
551
+ this._position += literal.length;
552
552
  return true;
553
553
  }
554
554
  return false;
555
555
  }
556
556
 
557
- #isIdentifierChar(ch: string): boolean {
557
+ private _isIdentifierChar(ch: string): boolean {
558
558
  return /[a-zA-Z0-9_-]/.test(ch);
559
559
  }
560
560
  }