@bcts/dcbor-parse 1.0.0-alpha.16 → 1.0.0-alpha.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +165 -165
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +17 -2
- package/dist/index.d.cts.map +1 -1
- package/dist/index.d.mts +17 -2
- package/dist/index.d.mts.map +1 -1
- package/dist/index.iife.js +165 -165
- package/dist/index.iife.js.map +1 -1
- package/dist/index.mjs +165 -165
- package/dist/index.mjs.map +1 -1
- package/package.json +9 -9
- package/src/token.ts +117 -117
package/src/token.ts
CHANGED
|
@@ -123,30 +123,30 @@ export const token = {
|
|
|
123
123
|
* Corresponds to the Rust `logos::Lexer` used in parse.rs
|
|
124
124
|
*/
|
|
125
125
|
export class Lexer {
|
|
126
|
-
readonly
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
126
|
+
private readonly _source: string;
|
|
127
|
+
private _position: number;
|
|
128
|
+
private _tokenStart: number;
|
|
129
|
+
private _tokenEnd: number;
|
|
130
130
|
|
|
131
131
|
constructor(source: string) {
|
|
132
|
-
this
|
|
133
|
-
this
|
|
134
|
-
this
|
|
135
|
-
this
|
|
132
|
+
this._source = source;
|
|
133
|
+
this._position = 0;
|
|
134
|
+
this._tokenStart = 0;
|
|
135
|
+
this._tokenEnd = 0;
|
|
136
136
|
}
|
|
137
137
|
|
|
138
138
|
/**
|
|
139
139
|
* Gets the current span (position range of the last token).
|
|
140
140
|
*/
|
|
141
141
|
span(): Span {
|
|
142
|
-
return span(this
|
|
142
|
+
return span(this._tokenStart, this._tokenEnd);
|
|
143
143
|
}
|
|
144
144
|
|
|
145
145
|
/**
|
|
146
146
|
* Gets the slice of source corresponding to the last token.
|
|
147
147
|
*/
|
|
148
148
|
slice(): string {
|
|
149
|
-
return this
|
|
149
|
+
return this._source.slice(this._tokenStart, this._tokenEnd);
|
|
150
150
|
}
|
|
151
151
|
|
|
152
152
|
/**
|
|
@@ -154,67 +154,67 @@ export class Lexer {
|
|
|
154
154
|
* Returns a Result to handle lexing errors.
|
|
155
155
|
*/
|
|
156
156
|
next(): ParseResult<Token> | undefined {
|
|
157
|
-
this
|
|
157
|
+
this._skipWhitespaceAndComments();
|
|
158
158
|
|
|
159
|
-
if (this
|
|
159
|
+
if (this._position >= this._source.length) {
|
|
160
160
|
return undefined;
|
|
161
161
|
}
|
|
162
162
|
|
|
163
|
-
this
|
|
163
|
+
this._tokenStart = this._position;
|
|
164
164
|
|
|
165
165
|
// Try to match tokens in order of specificity
|
|
166
166
|
const result =
|
|
167
|
-
this
|
|
168
|
-
this
|
|
169
|
-
this
|
|
170
|
-
this
|
|
171
|
-
this
|
|
172
|
-
this
|
|
173
|
-
this
|
|
174
|
-
this
|
|
175
|
-
this
|
|
176
|
-
this
|
|
167
|
+
this._tryMatchKeyword() ??
|
|
168
|
+
this._tryMatchDateLiteral() ??
|
|
169
|
+
this._tryMatchTagValueOrNumber() ??
|
|
170
|
+
this._tryMatchTagName() ??
|
|
171
|
+
this._tryMatchString() ??
|
|
172
|
+
this._tryMatchByteStringHex() ??
|
|
173
|
+
this._tryMatchByteStringBase64() ??
|
|
174
|
+
this._tryMatchKnownValue() ??
|
|
175
|
+
this._tryMatchUR() ??
|
|
176
|
+
this._tryMatchPunctuation();
|
|
177
177
|
|
|
178
178
|
if (result === undefined) {
|
|
179
179
|
// Unrecognized token - consume one character
|
|
180
|
-
this
|
|
181
|
-
this
|
|
180
|
+
this._position++;
|
|
181
|
+
this._tokenEnd = this._position;
|
|
182
182
|
return err(PE.unrecognizedToken(this.span()));
|
|
183
183
|
}
|
|
184
184
|
|
|
185
185
|
return result;
|
|
186
186
|
}
|
|
187
187
|
|
|
188
|
-
|
|
189
|
-
while (this
|
|
190
|
-
const ch = this
|
|
188
|
+
private _skipWhitespaceAndComments(): void {
|
|
189
|
+
while (this._position < this._source.length) {
|
|
190
|
+
const ch = this._source[this._position];
|
|
191
191
|
|
|
192
192
|
// Skip whitespace
|
|
193
193
|
if (ch === " " || ch === "\t" || ch === "\r" || ch === "\n" || ch === "\f") {
|
|
194
|
-
this
|
|
194
|
+
this._position++;
|
|
195
195
|
continue;
|
|
196
196
|
}
|
|
197
197
|
|
|
198
198
|
// Skip inline comments: /.../ (not preceded by another /)
|
|
199
199
|
if (
|
|
200
200
|
ch === "/" &&
|
|
201
|
-
this
|
|
202
|
-
this
|
|
201
|
+
this._position + 1 < this._source.length &&
|
|
202
|
+
this._source[this._position + 1] !== "/"
|
|
203
203
|
) {
|
|
204
|
-
this
|
|
205
|
-
while (this
|
|
206
|
-
this
|
|
204
|
+
this._position++; // Skip opening /
|
|
205
|
+
while (this._position < this._source.length && this._source[this._position] !== "/") {
|
|
206
|
+
this._position++;
|
|
207
207
|
}
|
|
208
|
-
if (this
|
|
209
|
-
this
|
|
208
|
+
if (this._position < this._source.length) {
|
|
209
|
+
this._position++; // Skip closing /
|
|
210
210
|
}
|
|
211
211
|
continue;
|
|
212
212
|
}
|
|
213
213
|
|
|
214
214
|
// Skip end-of-line comments: #...
|
|
215
215
|
if (ch === "#") {
|
|
216
|
-
while (this
|
|
217
|
-
this
|
|
216
|
+
while (this._position < this._source.length && this._source[this._position] !== "\n") {
|
|
217
|
+
this._position++;
|
|
218
218
|
}
|
|
219
219
|
continue;
|
|
220
220
|
}
|
|
@@ -223,7 +223,7 @@ export class Lexer {
|
|
|
223
223
|
}
|
|
224
224
|
}
|
|
225
225
|
|
|
226
|
-
|
|
226
|
+
private _tryMatchKeyword(): ParseResult<Token> | undefined {
|
|
227
227
|
const keywords: [string, Token][] = [
|
|
228
228
|
["true", token.bool(true)],
|
|
229
229
|
["false", token.bool(false)],
|
|
@@ -235,31 +235,31 @@ export class Lexer {
|
|
|
235
235
|
];
|
|
236
236
|
|
|
237
237
|
for (const [keyword, tok] of keywords) {
|
|
238
|
-
if (this
|
|
238
|
+
if (this._matchLiteral(keyword)) {
|
|
239
239
|
// Make sure it's not part of a longer identifier
|
|
240
|
-
const nextChar = this
|
|
241
|
-
if (nextChar === undefined || !this
|
|
242
|
-
this
|
|
240
|
+
const nextChar = this._source[this._position];
|
|
241
|
+
if (nextChar === undefined || !this._isIdentifierChar(nextChar)) {
|
|
242
|
+
this._tokenEnd = this._position;
|
|
243
243
|
return ok(tok);
|
|
244
244
|
}
|
|
245
245
|
// Reset position if it was a partial match
|
|
246
|
-
this
|
|
246
|
+
this._position = this._tokenStart;
|
|
247
247
|
}
|
|
248
248
|
}
|
|
249
249
|
|
|
250
250
|
return undefined;
|
|
251
251
|
}
|
|
252
252
|
|
|
253
|
-
|
|
253
|
+
private _tryMatchDateLiteral(): ParseResult<Token> | undefined {
|
|
254
254
|
// ISO-8601 date: YYYY-MM-DD or YYYY-MM-DDTHH:MM:SS...
|
|
255
255
|
const dateRegex = /^\d{4}-\d{2}-\d{2}(?:T\d{2}:\d{2}:\d{2}(?:\.\d+)?(?:Z|[+-]\d{2}:\d{2})?)?/;
|
|
256
|
-
const remaining = this
|
|
256
|
+
const remaining = this._source.slice(this._position);
|
|
257
257
|
const match = dateRegex.exec(remaining);
|
|
258
258
|
|
|
259
259
|
if (match !== null) {
|
|
260
260
|
const dateStr = match[0];
|
|
261
|
-
this
|
|
262
|
-
this
|
|
261
|
+
this._position += dateStr.length;
|
|
262
|
+
this._tokenEnd = this._position;
|
|
263
263
|
|
|
264
264
|
// Validate date components before parsing to match Rust's strict behavior
|
|
265
265
|
if (!isValidDateString(dateStr)) {
|
|
@@ -277,16 +277,16 @@ export class Lexer {
|
|
|
277
277
|
return undefined;
|
|
278
278
|
}
|
|
279
279
|
|
|
280
|
-
|
|
280
|
+
private _tryMatchTagValueOrNumber(): ParseResult<Token> | undefined {
|
|
281
281
|
// Check for tag value: integer followed by (
|
|
282
282
|
// Or just a number
|
|
283
283
|
const numberRegex = /^-?(?:0|[1-9]\d*)(?:\.\d+)?(?:[eE][+-]?\d+)?/;
|
|
284
|
-
const remaining = this
|
|
284
|
+
const remaining = this._source.slice(this._position);
|
|
285
285
|
const match = numberRegex.exec(remaining);
|
|
286
286
|
|
|
287
287
|
if (match !== null) {
|
|
288
288
|
const numStr = match[0];
|
|
289
|
-
const nextChar = this
|
|
289
|
+
const nextChar = this._source[this._position + numStr.length];
|
|
290
290
|
|
|
291
291
|
// Check if this is a tag value (integer followed by parenthesis)
|
|
292
292
|
if (
|
|
@@ -297,13 +297,13 @@ export class Lexer {
|
|
|
297
297
|
!numStr.startsWith("-")
|
|
298
298
|
) {
|
|
299
299
|
// It's a tag value
|
|
300
|
-
this
|
|
301
|
-
this
|
|
300
|
+
this._position += numStr.length + 1; // Include the (
|
|
301
|
+
this._tokenEnd = this._position;
|
|
302
302
|
|
|
303
303
|
const tagValue = parseInt(numStr, 10);
|
|
304
304
|
if (!Number.isSafeInteger(tagValue) || tagValue < 0) {
|
|
305
305
|
return err(
|
|
306
|
-
PE.invalidTagValue(numStr, span(this
|
|
306
|
+
PE.invalidTagValue(numStr, span(this._tokenStart, this._tokenStart + numStr.length)),
|
|
307
307
|
);
|
|
308
308
|
}
|
|
309
309
|
|
|
@@ -311,8 +311,8 @@ export class Lexer {
|
|
|
311
311
|
}
|
|
312
312
|
|
|
313
313
|
// It's a regular number
|
|
314
|
-
this
|
|
315
|
-
this
|
|
314
|
+
this._position += numStr.length;
|
|
315
|
+
this._tokenEnd = this._position;
|
|
316
316
|
|
|
317
317
|
const num = parseFloat(numStr);
|
|
318
318
|
return ok(token.number(num));
|
|
@@ -321,17 +321,17 @@ export class Lexer {
|
|
|
321
321
|
return undefined;
|
|
322
322
|
}
|
|
323
323
|
|
|
324
|
-
|
|
324
|
+
private _tryMatchTagName(): ParseResult<Token> | undefined {
|
|
325
325
|
// Tag name: identifier followed by (
|
|
326
326
|
const tagNameRegex = /^[a-zA-Z_][a-zA-Z0-9_-]*\(/;
|
|
327
|
-
const remaining = this
|
|
327
|
+
const remaining = this._source.slice(this._position);
|
|
328
328
|
const match = tagNameRegex.exec(remaining);
|
|
329
329
|
|
|
330
330
|
if (match !== null) {
|
|
331
331
|
const fullMatch = match[0];
|
|
332
332
|
const name = fullMatch.slice(0, -1); // Remove trailing (
|
|
333
|
-
this
|
|
334
|
-
this
|
|
333
|
+
this._position += fullMatch.length;
|
|
334
|
+
this._tokenEnd = this._position;
|
|
335
335
|
|
|
336
336
|
return ok(token.tagName(name));
|
|
337
337
|
}
|
|
@@ -339,64 +339,64 @@ export class Lexer {
|
|
|
339
339
|
return undefined;
|
|
340
340
|
}
|
|
341
341
|
|
|
342
|
-
|
|
343
|
-
if (this
|
|
342
|
+
private _tryMatchString(): ParseResult<Token> | undefined {
|
|
343
|
+
if (this._source[this._position] !== '"') {
|
|
344
344
|
return undefined;
|
|
345
345
|
}
|
|
346
346
|
|
|
347
347
|
// JavaScript-style string with escape sequences
|
|
348
348
|
// eslint-disable-next-line no-control-regex
|
|
349
349
|
const stringRegex = /^"([^"\\\x00-\x1F]|\\(["\\bnfrt/]|u[a-fA-F0-9]{4}))*"/;
|
|
350
|
-
const remaining = this
|
|
350
|
+
const remaining = this._source.slice(this._position);
|
|
351
351
|
const match = stringRegex.exec(remaining);
|
|
352
352
|
|
|
353
353
|
if (match !== null) {
|
|
354
354
|
const fullMatch = match[0];
|
|
355
|
-
this
|
|
356
|
-
this
|
|
355
|
+
this._position += fullMatch.length;
|
|
356
|
+
this._tokenEnd = this._position;
|
|
357
357
|
|
|
358
358
|
// Return the full string including quotes
|
|
359
359
|
return ok(token.string(fullMatch));
|
|
360
360
|
}
|
|
361
361
|
|
|
362
362
|
// Invalid string - try to find where it ends for better error reporting
|
|
363
|
-
this
|
|
364
|
-
while (this
|
|
365
|
-
const ch = this
|
|
363
|
+
this._position++;
|
|
364
|
+
while (this._position < this._source.length) {
|
|
365
|
+
const ch = this._source[this._position];
|
|
366
366
|
if (ch === '"' || ch === "\n") {
|
|
367
|
-
if (ch === '"') this
|
|
367
|
+
if (ch === '"') this._position++;
|
|
368
368
|
break;
|
|
369
369
|
}
|
|
370
370
|
if (ch === "\\") {
|
|
371
|
-
this
|
|
371
|
+
this._position += 2;
|
|
372
372
|
} else {
|
|
373
|
-
this
|
|
373
|
+
this._position++;
|
|
374
374
|
}
|
|
375
375
|
}
|
|
376
|
-
this
|
|
376
|
+
this._tokenEnd = this._position;
|
|
377
377
|
return err(PE.unrecognizedToken(this.span()));
|
|
378
378
|
}
|
|
379
379
|
|
|
380
|
-
|
|
380
|
+
private _tryMatchByteStringHex(): ParseResult<Token> | undefined {
|
|
381
381
|
// h'...'
|
|
382
|
-
if (!this
|
|
382
|
+
if (!this._matchLiteral("h'")) {
|
|
383
383
|
return undefined;
|
|
384
384
|
}
|
|
385
385
|
|
|
386
386
|
const hexRegex = /^[0-9a-fA-F]*/;
|
|
387
|
-
const remaining = this
|
|
387
|
+
const remaining = this._source.slice(this._position);
|
|
388
388
|
const match = hexRegex.exec(remaining);
|
|
389
389
|
const hexPart = match !== null ? match[0] : "";
|
|
390
390
|
|
|
391
|
-
this
|
|
391
|
+
this._position += hexPart.length;
|
|
392
392
|
|
|
393
|
-
if (this
|
|
394
|
-
this
|
|
393
|
+
if (this._source[this._position] !== "'") {
|
|
394
|
+
this._tokenEnd = this._position;
|
|
395
395
|
return err(PE.invalidHexString(this.span()));
|
|
396
396
|
}
|
|
397
397
|
|
|
398
|
-
this
|
|
399
|
-
this
|
|
398
|
+
this._position++; // Skip closing '
|
|
399
|
+
this._tokenEnd = this._position;
|
|
400
400
|
|
|
401
401
|
// Check that hex string has even length
|
|
402
402
|
if (hexPart.length % 2 !== 0) {
|
|
@@ -408,26 +408,26 @@ export class Lexer {
|
|
|
408
408
|
return ok(token.byteStringHex(bytes));
|
|
409
409
|
}
|
|
410
410
|
|
|
411
|
-
|
|
411
|
+
private _tryMatchByteStringBase64(): ParseResult<Token> | undefined {
|
|
412
412
|
// b64'...'
|
|
413
|
-
if (!this
|
|
413
|
+
if (!this._matchLiteral("b64'")) {
|
|
414
414
|
return undefined;
|
|
415
415
|
}
|
|
416
416
|
|
|
417
417
|
const base64Regex = /^[A-Za-z0-9+/=]*/;
|
|
418
|
-
const remaining = this
|
|
418
|
+
const remaining = this._source.slice(this._position);
|
|
419
419
|
const match = base64Regex.exec(remaining);
|
|
420
420
|
const base64Part = match !== null ? match[0] : "";
|
|
421
421
|
|
|
422
|
-
this
|
|
422
|
+
this._position += base64Part.length;
|
|
423
423
|
|
|
424
|
-
if (this
|
|
425
|
-
this
|
|
424
|
+
if (this._source[this._position] !== "'") {
|
|
425
|
+
this._tokenEnd = this._position;
|
|
426
426
|
return err(PE.invalidBase64String(this.span()));
|
|
427
427
|
}
|
|
428
428
|
|
|
429
|
-
this
|
|
430
|
-
this
|
|
429
|
+
this._position++; // Skip closing '
|
|
430
|
+
this._tokenEnd = this._position;
|
|
431
431
|
|
|
432
432
|
// Check minimum length requirement (2 characters)
|
|
433
433
|
if (base64Part.length < 2) {
|
|
@@ -443,32 +443,32 @@ export class Lexer {
|
|
|
443
443
|
}
|
|
444
444
|
}
|
|
445
445
|
|
|
446
|
-
|
|
447
|
-
if (this
|
|
446
|
+
private _tryMatchKnownValue(): ParseResult<Token> | undefined {
|
|
447
|
+
if (this._source[this._position] !== "'") {
|
|
448
448
|
return undefined;
|
|
449
449
|
}
|
|
450
450
|
|
|
451
451
|
// Check for empty string '' (Unit)
|
|
452
|
-
if (this
|
|
453
|
-
this
|
|
454
|
-
this
|
|
452
|
+
if (this._source[this._position + 1] === "'") {
|
|
453
|
+
this._position += 2;
|
|
454
|
+
this._tokenEnd = this._position;
|
|
455
455
|
return ok(token.knownValueName(""));
|
|
456
456
|
}
|
|
457
457
|
|
|
458
458
|
// Check for numeric known value: '0' or '[1-9][0-9]*'
|
|
459
459
|
const numericRegex = /^'(0|[1-9][0-9]*)'/;
|
|
460
|
-
const remaining = this
|
|
460
|
+
const remaining = this._source.slice(this._position);
|
|
461
461
|
let match = numericRegex.exec(remaining);
|
|
462
462
|
|
|
463
463
|
if (match !== null) {
|
|
464
464
|
const fullMatch = match[0];
|
|
465
465
|
const numStr = match[1];
|
|
466
|
-
this
|
|
467
|
-
this
|
|
466
|
+
this._position += fullMatch.length;
|
|
467
|
+
this._tokenEnd = this._position;
|
|
468
468
|
|
|
469
469
|
const value = parseInt(numStr, 10);
|
|
470
470
|
if (!Number.isSafeInteger(value) || value < 0) {
|
|
471
|
-
return err(PE.invalidKnownValue(numStr, span(this
|
|
471
|
+
return err(PE.invalidKnownValue(numStr, span(this._tokenStart + 1, this._tokenEnd - 1)));
|
|
472
472
|
}
|
|
473
473
|
|
|
474
474
|
return ok(token.knownValueNumber(value));
|
|
@@ -481,34 +481,34 @@ export class Lexer {
|
|
|
481
481
|
if (match !== null) {
|
|
482
482
|
const fullMatch = match[0];
|
|
483
483
|
const name = match[1];
|
|
484
|
-
this
|
|
485
|
-
this
|
|
484
|
+
this._position += fullMatch.length;
|
|
485
|
+
this._tokenEnd = this._position;
|
|
486
486
|
|
|
487
487
|
return ok(token.knownValueName(name));
|
|
488
488
|
}
|
|
489
489
|
|
|
490
490
|
// Invalid known value
|
|
491
|
-
this
|
|
492
|
-
while (this
|
|
493
|
-
this
|
|
491
|
+
this._position++;
|
|
492
|
+
while (this._position < this._source.length && this._source[this._position] !== "'") {
|
|
493
|
+
this._position++;
|
|
494
494
|
}
|
|
495
|
-
if (this
|
|
496
|
-
this
|
|
495
|
+
if (this._position < this._source.length) {
|
|
496
|
+
this._position++;
|
|
497
497
|
}
|
|
498
|
-
this
|
|
498
|
+
this._tokenEnd = this._position;
|
|
499
499
|
return err(PE.unrecognizedToken(this.span()));
|
|
500
500
|
}
|
|
501
501
|
|
|
502
|
-
|
|
502
|
+
private _tryMatchUR(): ParseResult<Token> | undefined {
|
|
503
503
|
// ur:type/data
|
|
504
504
|
const urRegex = /^ur:([a-zA-Z0-9][a-zA-Z0-9-]*)\/([a-zA-Z]{8,})/;
|
|
505
|
-
const remaining = this
|
|
505
|
+
const remaining = this._source.slice(this._position);
|
|
506
506
|
const match = urRegex.exec(remaining);
|
|
507
507
|
|
|
508
508
|
if (match !== null) {
|
|
509
509
|
const fullMatch = match[0];
|
|
510
|
-
this
|
|
511
|
-
this
|
|
510
|
+
this._position += fullMatch.length;
|
|
511
|
+
this._tokenEnd = this._position;
|
|
512
512
|
|
|
513
513
|
try {
|
|
514
514
|
const ur = UR.fromURString(fullMatch);
|
|
@@ -522,8 +522,8 @@ export class Lexer {
|
|
|
522
522
|
return undefined;
|
|
523
523
|
}
|
|
524
524
|
|
|
525
|
-
|
|
526
|
-
const ch = this
|
|
525
|
+
private _tryMatchPunctuation(): ParseResult<Token> | undefined {
|
|
526
|
+
const ch = this._source[this._position];
|
|
527
527
|
|
|
528
528
|
const punctuation: Record<string, Token> = {
|
|
529
529
|
"{": token.braceOpen(),
|
|
@@ -538,23 +538,23 @@ export class Lexer {
|
|
|
538
538
|
|
|
539
539
|
const matched = punctuation[ch];
|
|
540
540
|
if (matched !== undefined) {
|
|
541
|
-
this
|
|
542
|
-
this
|
|
541
|
+
this._position++;
|
|
542
|
+
this._tokenEnd = this._position;
|
|
543
543
|
return ok(matched);
|
|
544
544
|
}
|
|
545
545
|
|
|
546
546
|
return undefined;
|
|
547
547
|
}
|
|
548
548
|
|
|
549
|
-
|
|
550
|
-
if (this
|
|
551
|
-
this
|
|
549
|
+
private _matchLiteral(literal: string): boolean {
|
|
550
|
+
if (this._source.slice(this._position, this._position + literal.length) === literal) {
|
|
551
|
+
this._position += literal.length;
|
|
552
552
|
return true;
|
|
553
553
|
}
|
|
554
554
|
return false;
|
|
555
555
|
}
|
|
556
556
|
|
|
557
|
-
|
|
557
|
+
private _isIdentifierChar(ch: string): boolean {
|
|
558
558
|
return /[a-zA-Z0-9_-]/.test(ch);
|
|
559
559
|
}
|
|
560
560
|
}
|