@atomic-ehr/fhirpath 0.0.1-canary.0c6931e.20250727185306

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. package/README.md +473 -0
  2. package/dist/index.d.ts +462 -0
  3. package/dist/index.js +10307 -0
  4. package/dist/index.js.map +1 -0
  5. package/package.json +58 -0
  6. package/src/analyzer/analyzer.ts +499 -0
  7. package/src/analyzer/model-provider.ts +244 -0
  8. package/src/analyzer/schemas/index.ts +2 -0
  9. package/src/analyzer/schemas/types.ts +40 -0
  10. package/src/analyzer/types.ts +142 -0
  11. package/src/api/builder.ts +157 -0
  12. package/src/api/errors.ts +145 -0
  13. package/src/api/expression.ts +156 -0
  14. package/src/api/index.ts +122 -0
  15. package/src/api/inspect.ts +99 -0
  16. package/src/api/registry.ts +128 -0
  17. package/src/api/types.ts +210 -0
  18. package/src/compiler/compiler.ts +546 -0
  19. package/src/compiler/index.ts +2 -0
  20. package/src/compiler/prototype-context-adapter.ts +99 -0
  21. package/src/compiler/types.ts +24 -0
  22. package/src/index.ts +107 -0
  23. package/src/interpreter/README.md +78 -0
  24. package/src/interpreter/interpreter.ts +475 -0
  25. package/src/interpreter/types.ts +108 -0
  26. package/src/lexer/char-tables.ts +37 -0
  27. package/src/lexer/errors.ts +31 -0
  28. package/src/lexer/index.ts +5 -0
  29. package/src/lexer/lexer.ts +745 -0
  30. package/src/lexer/token.ts +104 -0
  31. package/src/lexer2/index.md +232 -0
  32. package/src/lexer2/index.perf.test.ts +68 -0
  33. package/src/lexer2/index.test.ts +549 -0
  34. package/src/lexer2/index.ts +1251 -0
  35. package/src/lexer2/notes.md +173 -0
  36. package/src/lexer2/optimization-summary.md +718 -0
  37. package/src/parser/ast-factory.ts +220 -0
  38. package/src/parser/ast.ts +144 -0
  39. package/src/parser/collection-parser.ts +89 -0
  40. package/src/parser/diagnostic-messages.ts +216 -0
  41. package/src/parser/diagnostics.ts +85 -0
  42. package/src/parser/error-reporter.ts +230 -0
  43. package/src/parser/index.ts +3 -0
  44. package/src/parser/literal-parser.ts +103 -0
  45. package/src/parser/parse-error.ts +16 -0
  46. package/src/parser/parser-error-factory.ts +141 -0
  47. package/src/parser/parser-state.ts +134 -0
  48. package/src/parser/parser.ts +1272 -0
  49. package/src/parser/pprint.ts +169 -0
  50. package/src/parser/precedence-manager.ts +64 -0
  51. package/src/parser/source-mapper.ts +248 -0
  52. package/src/parser/special-constructs.ts +142 -0
  53. package/src/parser/token-navigator.ts +110 -0
  54. package/src/parser/types.ts +60 -0
  55. package/src/parser2/index.md +177 -0
  56. package/src/parser2/index.perf.test.ts +184 -0
  57. package/src/parser2/index.test.ts +305 -0
  58. package/src/parser2/index.ts +578 -0
  59. package/src/parser2/optimization-summary.md +176 -0
  60. package/src/registry/default-analyzers.ts +257 -0
  61. package/src/registry/default-compilers.ts +31 -0
  62. package/src/registry/index.ts +96 -0
  63. package/src/registry/operations/arithmetic.ts +506 -0
  64. package/src/registry/operations/collection.ts +425 -0
  65. package/src/registry/operations/comparison.ts +432 -0
  66. package/src/registry/operations/existence.ts +703 -0
  67. package/src/registry/operations/filtering.ts +358 -0
  68. package/src/registry/operations/literals.ts +341 -0
  69. package/src/registry/operations/logical.ts +439 -0
  70. package/src/registry/operations/math.ts +128 -0
  71. package/src/registry/operations/membership.ts +132 -0
  72. package/src/registry/operations/navigation.ts +52 -0
  73. package/src/registry/operations/string.ts +507 -0
  74. package/src/registry/operations/subsetting.ts +174 -0
  75. package/src/registry/operations/type-checking.ts +162 -0
  76. package/src/registry/operations/type-conversion.ts +404 -0
  77. package/src/registry/operations/type-operators.ts +308 -0
  78. package/src/registry/operations/utility.ts +644 -0
  79. package/src/registry/registry.ts +146 -0
  80. package/src/registry/types.ts +161 -0
  81. package/src/registry/utils/evaluation-helpers.ts +93 -0
  82. package/src/registry/utils/index.ts +3 -0
  83. package/src/registry/utils/type-system.ts +173 -0
  84. package/src/runtime/context.ts +158 -0
  85. package/src/runtime/debug-context.ts +135 -0
@@ -0,0 +1,1251 @@
1
+ export enum TokenType {
2
+ // Non-operators (no precedence)
3
+ // Literals
4
+ NULL = 0x0001,
5
+ BOOLEAN = 0x0002,
6
+ STRING = 0x0003,
7
+ NUMBER = 0x0004,
8
+ DATETIME = 0x0005,
9
+ TIME = 0x0006,
10
+
11
+ // Identifiers
12
+ IDENTIFIER = 0x0007,
13
+ DELIMITED_IDENTIFIER = 0x0008,
14
+
15
+ // Keywords (some used as operators with precedence)
16
+ TRUE = 0x0009,
17
+ FALSE = 0x000A,
18
+
19
+ // Special identifiers
20
+ THIS = 0x000B,
21
+ INDEX = 0x000C,
22
+ TOTAL = 0x000D,
23
+
24
+ // Environment variable
25
+ ENV_VAR = 0x000E,
26
+
27
+ // Date/time units
28
+ YEAR = 0x000F,
29
+ MONTH = 0x0010,
30
+ WEEK = 0x0011,
31
+ DAY = 0x0012,
32
+ HOUR = 0x0013,
33
+ MINUTE = 0x0014,
34
+ SECOND = 0x0015,
35
+ MILLISECOND = 0x0016,
36
+ YEARS = 0x0017,
37
+ MONTHS = 0x0018,
38
+ WEEKS = 0x0019,
39
+ DAYS = 0x001A,
40
+ HOURS = 0x001B,
41
+ MINUTES = 0x001C,
42
+ SECONDS = 0x001D,
43
+ MILLISECONDS = 0x001E,
44
+
45
+ // Special
46
+ EOF = 0x001F,
47
+ WHITESPACE = 0x0020,
48
+ COMMENT = 0x0021,
49
+ LINE_COMMENT = 0x0022,
50
+
51
+ // Operators with precedence (0xPPXX where PP is precedence in hex)
52
+ // Precedence 5
53
+ PIPE = 0x0501, // precedence 5
54
+
55
+ // Precedence 10
56
+ IMPLIES = 0x0A01, // precedence 10
57
+
58
+ // Precedence 20
59
+ OR = 0x1401, // precedence 20
60
+ XOR = 0x1402, // precedence 20
61
+
62
+ // Precedence 30
63
+ AND = 0x1E01, // precedence 30
64
+
65
+ // Precedence 35
66
+ IN = 0x2301, // precedence 35
67
+ CONTAINS = 0x2302, // precedence 35
68
+
69
+ // Precedence 40
70
+ EQ = 0x2801, // precedence 40
71
+ NEQ = 0x2802, // precedence 40
72
+ SIMILAR = 0x2803, // precedence 40
73
+ NOT_SIMILAR = 0x2804, // precedence 40
74
+
75
+ // Precedence 50
76
+ LT = 0x3201, // precedence 50
77
+ GT = 0x3202, // precedence 50
78
+ LTE = 0x3203, // precedence 50
79
+ GTE = 0x3204, // precedence 50
80
+
81
+ // Precedence 60
82
+ AMPERSAND = 0x3C01, // precedence 60
83
+
84
+ // Precedence 70
85
+ PLUS = 0x4601, // precedence 70
86
+ MINUS = 0x4602, // precedence 70
87
+
88
+ // Precedence 80
89
+ MULTIPLY = 0x5001, // precedence 80
90
+ DIVIDE = 0x5002, // precedence 80
91
+ DIV = 0x5003, // precedence 80
92
+ MOD = 0x5004, // precedence 80
93
+
94
+ // Precedence 90
95
+ AS = 0x5A01, // precedence 90
96
+ IS = 0x5A02, // precedence 90
97
+
98
+ // Precedence 100
99
+ DOT = 0x6401, // precedence 100
100
+ LBRACKET = 0x6402, // precedence 100
101
+ LPAREN = 0x6403, // precedence 100
102
+
103
+ // Non-operator tokens (no precedence)
104
+ RPAREN = 0x0023,
105
+ RBRACKET = 0x0024,
106
+ LBRACE = 0x0025,
107
+ RBRACE = 0x0026,
108
+ COMMA = 0x0027,
109
+ PERCENT = 0x0028,
110
+ AT = 0x0029,
111
+ }
112
+
113
+ // Helper to convert numeric token type to string for debugging
114
+ const TOKEN_TYPE_NAMES: { [key: number]: string } = {
115
+ [TokenType.NULL]: 'NULL',
116
+ [TokenType.BOOLEAN]: 'BOOLEAN',
117
+ [TokenType.STRING]: 'STRING',
118
+ [TokenType.NUMBER]: 'NUMBER',
119
+ [TokenType.DATETIME]: 'DATETIME',
120
+ [TokenType.TIME]: 'TIME',
121
+ [TokenType.IDENTIFIER]: 'IDENTIFIER',
122
+ [TokenType.DELIMITED_IDENTIFIER]: 'DELIMITED_IDENTIFIER',
123
+ [TokenType.TRUE]: 'TRUE',
124
+ [TokenType.FALSE]: 'FALSE',
125
+ [TokenType.AS]: 'AS',
126
+ [TokenType.CONTAINS]: 'CONTAINS',
127
+ [TokenType.IN]: 'IN',
128
+ [TokenType.IS]: 'IS',
129
+ [TokenType.DIV]: 'DIV',
130
+ [TokenType.MOD]: 'MOD',
131
+ [TokenType.AND]: 'AND',
132
+ [TokenType.OR]: 'OR',
133
+ [TokenType.XOR]: 'XOR',
134
+ [TokenType.IMPLIES]: 'IMPLIES',
135
+ [TokenType.THIS]: 'THIS',
136
+ [TokenType.INDEX]: 'INDEX',
137
+ [TokenType.TOTAL]: 'TOTAL',
138
+ [TokenType.DOT]: 'DOT',
139
+ [TokenType.LPAREN]: 'LPAREN',
140
+ [TokenType.RPAREN]: 'RPAREN',
141
+ [TokenType.LBRACKET]: 'LBRACKET',
142
+ [TokenType.RBRACKET]: 'RBRACKET',
143
+ [TokenType.LBRACE]: 'LBRACE',
144
+ [TokenType.RBRACE]: 'RBRACE',
145
+ [TokenType.PLUS]: 'PLUS',
146
+ [TokenType.MINUS]: 'MINUS',
147
+ [TokenType.MULTIPLY]: 'MULTIPLY',
148
+ [TokenType.DIVIDE]: 'DIVIDE',
149
+ [TokenType.AMPERSAND]: 'AMPERSAND',
150
+ [TokenType.PIPE]: 'PIPE',
151
+ [TokenType.LTE]: 'LTE',
152
+ [TokenType.LT]: 'LT',
153
+ [TokenType.GT]: 'GT',
154
+ [TokenType.GTE]: 'GTE',
155
+ [TokenType.EQ]: 'EQ',
156
+ [TokenType.NEQ]: 'NEQ',
157
+ [TokenType.SIMILAR]: 'SIMILAR',
158
+ [TokenType.NOT_SIMILAR]: 'NOT_SIMILAR',
159
+ [TokenType.COMMA]: 'COMMA',
160
+ [TokenType.PERCENT]: 'PERCENT',
161
+ [TokenType.AT]: 'AT',
162
+ [TokenType.ENV_VAR]: 'ENV_VAR',
163
+ [TokenType.YEAR]: 'YEAR',
164
+ [TokenType.MONTH]: 'MONTH',
165
+ [TokenType.WEEK]: 'WEEK',
166
+ [TokenType.DAY]: 'DAY',
167
+ [TokenType.HOUR]: 'HOUR',
168
+ [TokenType.MINUTE]: 'MINUTE',
169
+ [TokenType.SECOND]: 'SECOND',
170
+ [TokenType.MILLISECOND]: 'MILLISECOND',
171
+ [TokenType.YEARS]: 'YEARS',
172
+ [TokenType.MONTHS]: 'MONTHS',
173
+ [TokenType.WEEKS]: 'WEEKS',
174
+ [TokenType.DAYS]: 'DAYS',
175
+ [TokenType.HOURS]: 'HOURS',
176
+ [TokenType.MINUTES]: 'MINUTES',
177
+ [TokenType.SECONDS]: 'SECONDS',
178
+ [TokenType.MILLISECONDS]: 'MILLISECONDS',
179
+ [TokenType.EOF]: 'EOF',
180
+ [TokenType.WHITESPACE]: 'WHITESPACE',
181
+ [TokenType.COMMENT]: 'COMMENT',
182
+ [TokenType.LINE_COMMENT]: 'LINE_COMMENT',
183
+ };
184
+
185
+ export function tokenTypeToString(type: TokenType): string {
186
+ return TOKEN_TYPE_NAMES[type] || `UNKNOWN(${type})`;
187
+ }
188
+
189
+ export enum Channel {
190
+ REGULAR = 0,
191
+ HIDDEN = 1,
192
+ }
193
+
194
+ export interface Token {
195
+ type: TokenType;
196
+ start: number;
197
+ end: number;
198
+ line: number;
199
+ column: number;
200
+ channel?: Channel;
201
+ }
202
+
203
+ export interface LexerOptions {
204
+ skipWhitespace?: boolean;
205
+ skipComments?: boolean;
206
+ preserveTrivia?: boolean; // When true, whitespace/comments get Channel.HIDDEN
207
+ }
208
+
209
+ // Character code constants
210
+ const CHAR_0 = 48;
211
+ const CHAR_9 = 57;
212
+ const CHAR_A = 65;
213
+ const CHAR_F = 70;
214
+ const CHAR_Z = 90;
215
+ const CHAR_UNDERSCORE = 95;
216
+ const CHAR_a = 97;
217
+ const CHAR_f = 102;
218
+ const CHAR_z = 122;
219
+
220
+ // Lookup tables for character classification
221
+ const IS_DIGIT = new Uint8Array(256);
222
+ const IS_LETTER = new Uint8Array(256);
223
+ const IS_LETTER_OR_DIGIT = new Uint8Array(256);
224
+ const IS_HEX_DIGIT = new Uint8Array(256);
225
+
226
+ // Initialize lookup tables
227
+ for (let i = 0; i < 256; i++) {
228
+ if (i >= CHAR_0 && i <= CHAR_9) {
229
+ IS_DIGIT[i] = 1;
230
+ IS_LETTER_OR_DIGIT[i] = 1;
231
+ IS_HEX_DIGIT[i] = 1;
232
+ }
233
+ if ((i >= CHAR_A && i <= CHAR_Z) || (i >= CHAR_a && i <= CHAR_z) || i === CHAR_UNDERSCORE) {
234
+ IS_LETTER[i] = 1;
235
+ IS_LETTER_OR_DIGIT[i] = 1;
236
+ }
237
+ if ((i >= CHAR_A && i <= CHAR_F) || (i >= CHAR_a && i <= CHAR_f)) {
238
+ IS_HEX_DIGIT[i] = 1;
239
+ }
240
+ }
241
+
242
+ export class Lexer {
243
+ private input: string;
244
+ private position: number = 0;
245
+ private line: number = 1;
246
+ private column: number = 1;
247
+ private options: LexerOptions;
248
+
249
+ constructor(input: string, options: LexerOptions = {}) {
250
+ this.input = input;
251
+ this.options = {
252
+ skipWhitespace: options.skipWhitespace ?? true,
253
+ skipComments: options.skipComments ?? true,
254
+ preserveTrivia: options.preserveTrivia ?? false,
255
+ };
256
+ }
257
+
258
+ private peek(offset: number = 0): string {
259
+ const pos = this.position + offset;
260
+ if (pos >= this.input.length) {
261
+ return '';
262
+ }
263
+ return this.input[pos] || '';
264
+ }
265
+
266
+ private peekCharCode(offset: number = 0): number {
267
+ const pos = this.position + offset;
268
+ if (pos >= this.input.length) {
269
+ return -1;
270
+ }
271
+ return this.input.charCodeAt(pos);
272
+ }
273
+
274
+
275
+ private throwUnexpectedChar(char: string): never {
276
+ throw new Error(`Unexpected character '${char}' at position ${this.position}`);
277
+ }
278
+
279
+ private throwUnexpectedCharCode(charCode: number): never {
280
+ throw new Error(`Unexpected character '${String.fromCharCode(charCode)}' at position ${this.position}`);
281
+ }
282
+
283
+ private advance(): string {
284
+ if (this.position >= this.input.length) {
285
+ return '';
286
+ }
287
+ const char = this.input[this.position] || '';
288
+ this.position++;
289
+
290
+ // Update line and column
291
+ if (char === '\n') {
292
+ this.line++;
293
+ this.column = 1;
294
+ } else {
295
+ this.column++;
296
+ }
297
+
298
+ return char;
299
+ }
300
+
301
+ private readWhitespace(): Token | null {
302
+ const start = this.position;
303
+ const startLine = this.line;
304
+ const startColumn = this.column;
305
+
306
+ // Inline whitespace reading with character code switch
307
+ while (this.position < this.input.length) {
308
+ const charCode = this.input.charCodeAt(this.position);
309
+
310
+ switch (charCode) {
311
+ case 32: // ' ' (space)
312
+ case 9: // '\t' (tab)
313
+ this.position++;
314
+ this.column++;
315
+ break;
316
+ case 13: // '\r' (carriage return)
317
+ this.position++;
318
+ // Don't update column for \r
319
+ break;
320
+ case 10: // '\n' (line feed)
321
+ this.position++;
322
+ this.line++;
323
+ this.column = 1;
324
+ break;
325
+ default:
326
+ // Not whitespace, exit loop
327
+ if (this.position > start) {
328
+ const token: Token = { type: TokenType.WHITESPACE, start, end: this.position, line: startLine, column: startColumn };
329
+ if (this.options.preserveTrivia) {
330
+ token.channel = Channel.HIDDEN;
331
+ }
332
+ return token;
333
+ }
334
+ return null;
335
+ }
336
+ }
337
+
338
+ // Reached end of input
339
+ if (this.position > start) {
340
+ const token: Token = { type: TokenType.WHITESPACE, start, end: this.position, line: startLine, column: startColumn };
341
+ if (this.options.preserveTrivia) {
342
+ token.channel = Channel.HIDDEN;
343
+ }
344
+ return token;
345
+ }
346
+
347
+ return null;
348
+ }
349
+
350
+ private readComment(): Token | null {
351
+ const start = this.position;
352
+ const startLine = this.line;
353
+ const startColumn = this.column;
354
+
355
+ if (this.peek() === '/' && this.peek(1) === '*') {
356
+ this.advance(); // /
357
+ this.advance(); // *
358
+
359
+ while (this.position < this.input.length - 1) {
360
+ if (this.peek() === '*' && this.peek(1) === '/') {
361
+ this.advance(); // *
362
+ this.advance(); // /
363
+ break;
364
+ }
365
+ this.advance();
366
+ }
367
+
368
+ const token: Token = { type: TokenType.COMMENT, start, end: this.position, line: startLine, column: startColumn };
369
+ if (this.options.preserveTrivia) {
370
+ token.channel = Channel.HIDDEN;
371
+ }
372
+ return token;
373
+ }
374
+
375
+ if (this.peek() === '/' && this.peek(1) === '/') {
376
+ this.advance(); // /
377
+ this.advance(); // /
378
+
379
+ while (this.position < this.input.length && this.peek() !== '\n') {
380
+ this.advance();
381
+ }
382
+
383
+ const token: Token = { type: TokenType.LINE_COMMENT, start, end: this.position, line: startLine, column: startColumn };
384
+ if (this.options.preserveTrivia) {
385
+ token.channel = Channel.HIDDEN;
386
+ }
387
+ return token;
388
+ }
389
+
390
+ return null;
391
+ }
392
+
393
+ private readString(): Token | null {
394
+ const start = this.position;
395
+ const startLine = this.line;
396
+ const startColumn = this.column;
397
+
398
+ // Inline peekCharCode
399
+ if (this.position >= this.input.length) return null;
400
+ const firstCharCode = this.input.charCodeAt(this.position);
401
+
402
+ if (firstCharCode !== 39 && firstCharCode !== 34) { // ' and "
403
+ return null;
404
+ }
405
+
406
+ const quoteCharCode = firstCharCode;
407
+ // Inline advance
408
+ this.position++;
409
+ this.column++;
410
+
411
+ while (this.position < this.input.length) {
412
+ const charCode = this.input.charCodeAt(this.position);
413
+
414
+ if (charCode === quoteCharCode) {
415
+ // Inline advance
416
+ this.position++;
417
+ this.column++;
418
+ return { type: TokenType.STRING, start, end: this.position, line: startLine, column: startColumn };
419
+ }
420
+
421
+ if (charCode === 92) { // \
422
+ // Inline advance
423
+ this.position++;
424
+ this.column++;
425
+
426
+ if (this.position >= this.input.length) {
427
+ throw new Error(`Invalid escape sequence at position ${this.position}`);
428
+ }
429
+ const escapedCode = this.input.charCodeAt(this.position);
430
+
431
+ switch (escapedCode) {
432
+ case 96: // `
433
+ case 39: // '
434
+ case 34: // "
435
+ case 92: // \
436
+ case 47: // /
437
+ case 102: // f
438
+ case 110: // n
439
+ case 114: // r
440
+ case 116: // t
441
+ // Inline advance
442
+ this.position++;
443
+ this.column++;
444
+ break;
445
+ case 117: // u
446
+ // Inline advance
447
+ this.position++;
448
+ this.column++;
449
+ // Read 4 hex digits
450
+ for (let i = 0; i < 4; i++) {
451
+ if (this.position >= this.input.length) {
452
+ throw new Error(`Invalid unicode escape at position ${this.position}`);
453
+ }
454
+ const hexCode = this.input.charCodeAt(this.position);
455
+ if (IS_HEX_DIGIT[hexCode]) {
456
+ this.position++;
457
+ this.column++;
458
+ } else {
459
+ throw new Error(`Invalid unicode escape at position ${this.position}`);
460
+ }
461
+ }
462
+ break;
463
+ default:
464
+ const escaped = String.fromCharCode(escapedCode);
465
+ throw new Error(`Invalid escape sequence \\${escaped} at position ${this.position}`);
466
+ }
467
+ } else if (charCode === 10) { // \n
468
+ // Handle newline
469
+ this.position++;
470
+ this.line++;
471
+ this.column = 1;
472
+ } else {
473
+ // Regular character - inline advance
474
+ this.position++;
475
+ this.column++;
476
+ }
477
+ }
478
+
479
+ throw new Error(`Unterminated string at position ${start}`);
480
+ }
481
+
482
+ private readDelimitedIdentifier(): Token | null {
483
+ const start = this.position;
484
+ const startLine = this.line;
485
+ const startColumn = this.column;
486
+ if (this.peekCharCode() !== 96) { // `
487
+ return null;
488
+ }
489
+
490
+ this.advance(); // `
491
+
492
+ while (this.position < this.input.length) {
493
+ const charCode = this.peekCharCode();
494
+
495
+ if (charCode === 96) { // `
496
+ this.advance();
497
+ return { type: TokenType.DELIMITED_IDENTIFIER, start, end: this.position, line: startLine, column: startColumn };
498
+ }
499
+
500
+ if (charCode === 92) { // \
501
+ this.advance();
502
+ const escapedCode = this.peekCharCode();
503
+ if (escapedCode === 96 || escapedCode === 92) { // ` or \
504
+ this.advance();
505
+ }
506
+ } else {
507
+ this.advance();
508
+ }
509
+ }
510
+
511
+ throw new Error(`Unterminated delimited identifier at position ${start}`);
512
+ }
513
+
514
+ private readNumber(): Token | null {
515
+ const start = this.position;
516
+ const startLine = this.line;
517
+ const startColumn = this.column;
518
+
519
+ // Inline first digit check
520
+ if (this.position >= this.input.length) return null;
521
+ const firstCharCode = this.input.charCodeAt(this.position);
522
+ if (!IS_DIGIT[firstCharCode]) {
523
+ return null;
524
+ }
525
+
526
+ // Inline digit reading loop with inlined advance
527
+ while (this.position < this.input.length) {
528
+ const charCode = this.input.charCodeAt(this.position);
529
+ if (IS_DIGIT[charCode]) {
530
+ this.position++;
531
+ this.column++;
532
+ } else {
533
+ break;
534
+ }
535
+ }
536
+
537
+ // Check for decimal part
538
+ if (this.position < this.input.length && this.input[this.position] === '.') {
539
+ const nextPos = this.position + 1;
540
+ if (nextPos < this.input.length) {
541
+ const nextCharCode = this.input.charCodeAt(nextPos);
542
+ if (IS_DIGIT[nextCharCode]) {
543
+ this.position++; // consume '.'
544
+ // Inline decimal digit reading with inlined advance
545
+ while (this.position < this.input.length) {
546
+ const charCode = this.input.charCodeAt(this.position);
547
+ if (IS_DIGIT[charCode]) {
548
+ this.position++;
549
+ this.column++;
550
+ } else {
551
+ break;
552
+ }
553
+ }
554
+ }
555
+ }
556
+ }
557
+
558
+ return { type: TokenType.NUMBER, start, end: this.position, line: startLine, column: startColumn };
559
+ }
560
+
561
+ private readDateTime(): Token | null {
562
+ const start = this.position;
563
+ const startLine = this.line;
564
+ const startColumn = this.column;
565
+ if (this.peek() !== '@') {
566
+ return null;
567
+ }
568
+
569
+ const savedPosition = this.position;
570
+ const savedLine = this.line;
571
+ const savedColumn = this.column;
572
+ this.advance(); // @
573
+
574
+ // Check for time format first
575
+ if (this.peek() === 'T') {
576
+ this.advance(); // T
577
+ if (this.readTimeFormat()) {
578
+ return { type: TokenType.TIME, start, end: this.position, line: startLine, column: startColumn };
579
+ }
580
+ // Restore position if not a valid time
581
+ this.position = savedPosition;
582
+ this.line = savedLine;
583
+ this.column = savedColumn;
584
+ return null;
585
+ }
586
+
587
+ // Try to read datetime
588
+ // Year (4 digits)
589
+ for (let i = 0; i < 4; i++) {
590
+ const charCode = this.peekCharCode();
591
+ if (charCode === -1 || !IS_DIGIT[charCode]) {
592
+ this.position = savedPosition;
593
+ this.line = savedLine;
594
+ this.column = savedColumn;
595
+ return null;
596
+ }
597
+ this.advance();
598
+ }
599
+
600
+ // Optional month, day, time
601
+ if (this.peek() === '-') {
602
+ this.advance();
603
+ // Month
604
+ const monthChar0 = this.peekCharCode();
605
+ const monthChar1 = this.peekCharCode(1);
606
+ if (monthChar0 === -1 || !IS_DIGIT[monthChar0] ||
607
+ monthChar1 === -1 || !IS_DIGIT[monthChar1]) {
608
+ this.position = savedPosition;
609
+ this.line = savedLine;
610
+ this.column = savedColumn;
611
+ return null;
612
+ }
613
+ this.advance();
614
+ this.advance();
615
+
616
+ // Optional day
617
+ if (this.peek() === '-') {
618
+ this.advance();
619
+ const dayChar0 = this.peekCharCode();
620
+ const dayChar1 = this.peekCharCode(1);
621
+ if (dayChar0 === -1 || !IS_DIGIT[dayChar0] ||
622
+ dayChar1 === -1 || !IS_DIGIT[dayChar1]) {
623
+ this.position = savedPosition;
624
+ this.line = savedLine;
625
+ this.column = savedColumn;
626
+ return null;
627
+ }
628
+ this.advance();
629
+ this.advance();
630
+
631
+ // Optional time
632
+ if (this.peek() === 'T') {
633
+ this.advance();
634
+ this.readTimeFormat();
635
+ }
636
+ } else if (this.peek() === 'T') {
637
+ this.advance();
638
+ }
639
+ } else if (this.peek() === 'T') {
640
+ this.advance();
641
+ }
642
+
643
+ // Optional timezone
644
+ if (this.peek() === 'Z') {
645
+ this.advance();
646
+ } else if (this.peek() === '+' || this.peek() === '-') {
647
+ this.advance();
648
+ const tzChar0 = this.peekCharCode();
649
+ const tzChar1 = this.peekCharCode(1);
650
+ if (tzChar0 === -1 || !IS_DIGIT[tzChar0] ||
651
+ tzChar1 === -1 || !IS_DIGIT[tzChar1]) {
652
+ // Invalid timezone offset
653
+ } else {
654
+ this.advance();
655
+ this.advance();
656
+ if (this.peek() === ':') {
657
+ this.advance();
658
+ const tzMinChar0 = this.peekCharCode();
659
+ const tzMinChar1 = this.peekCharCode(1);
660
+ if (tzMinChar0 !== -1 && IS_DIGIT[tzMinChar0] &&
661
+ tzMinChar1 !== -1 && IS_DIGIT[tzMinChar1]) {
662
+ this.advance();
663
+ this.advance();
664
+ }
665
+ }
666
+ }
667
+ }
668
+
669
+ return { type: TokenType.DATETIME, start, end: this.position, line: startLine, column: startColumn };
670
+ }
671
+
672
+ private readTimeFormat(): boolean {
673
+ // HH
674
+ const hhChar0 = this.peekCharCode();
675
+ const hhChar1 = this.peekCharCode(1);
676
+ if (hhChar0 === -1 || !IS_DIGIT[hhChar0] ||
677
+ hhChar1 === -1 || !IS_DIGIT[hhChar1]) {
678
+ return false;
679
+ }
680
+ this.advance();
681
+ this.advance();
682
+
683
+ // Optional :MM
684
+ if (this.peek() === ':') {
685
+ this.advance();
686
+ const mmChar0 = this.peekCharCode();
687
+ const mmChar1 = this.peekCharCode(1);
688
+ if (mmChar0 === -1 || !IS_DIGIT[mmChar0] ||
689
+ mmChar1 === -1 || !IS_DIGIT[mmChar1]) {
690
+ return false;
691
+ }
692
+ this.advance();
693
+ this.advance();
694
+
695
+ // Optional :SS
696
+ if (this.peek() === ':') {
697
+ this.advance();
698
+ const ssChar0 = this.peekCharCode();
699
+ const ssChar1 = this.peekCharCode(1);
700
+ if (ssChar0 === -1 || !IS_DIGIT[ssChar0] ||
701
+ ssChar1 === -1 || !IS_DIGIT[ssChar1]) {
702
+ return false;
703
+ }
704
+ this.advance();
705
+ this.advance();
706
+
707
+ // Optional .fraction
708
+ if (this.peek() === '.') {
709
+ this.advance();
710
+ const fracChar = this.peekCharCode();
711
+ if (fracChar === -1 || !IS_DIGIT[fracChar]) {
712
+ return false;
713
+ }
714
+ while (this.position < this.input.length) {
715
+ const charCode = this.peekCharCode();
716
+ if (charCode !== -1 && IS_DIGIT[charCode]) {
717
+ this.advance();
718
+ } else {
719
+ break;
720
+ }
721
+ }
722
+ }
723
+ }
724
+ }
725
+
726
+ return true;
727
+ }
728
+
729
+ private readIdentifierOrKeyword(): Token | null {
730
+ const start = this.position;
731
+ const startLine = this.line;
732
+ const startColumn = this.column;
733
+
734
+ // Inline first letter check
735
+ if (this.position >= this.input.length) return null;
736
+ const firstCharCode = this.input.charCodeAt(this.position);
737
+ if (!IS_LETTER[firstCharCode]) {
738
+ return null;
739
+ }
740
+
741
+ // Inline letter/digit reading loop with inlined advance
742
+ while (this.position < this.input.length) {
743
+ const charCode = this.input.charCodeAt(this.position);
744
+ if (IS_LETTER_OR_DIGIT[charCode]) {
745
+ this.position++;
746
+ this.column++;
747
+ } else {
748
+ break;
749
+ }
750
+ }
751
+
752
+ const length = this.position - start;
753
+
754
+ // Check for keywords directly from input buffer without substring
755
+ let type: TokenType = TokenType.IDENTIFIER;
756
+ const input = this.input;
757
+
758
+ switch (length) {
759
+ case 2:
760
+ const c0_2 = input.charCodeAt(start);
761
+ const c1_2 = input.charCodeAt(start + 1);
762
+ if (c0_2 === 97 && c1_2 === 115) type = TokenType.AS; // 'as'
763
+ else if (c0_2 === 105 && c1_2 === 110) type = TokenType.IN; // 'in'
764
+ else if (c0_2 === 105 && c1_2 === 115) type = TokenType.IS; // 'is'
765
+ else if (c0_2 === 111 && c1_2 === 114) type = TokenType.OR; // 'or'
766
+ break;
767
+ case 3:
768
+ const c0_3 = input.charCodeAt(start);
769
+ const c1_3 = input.charCodeAt(start + 1);
770
+ const c2_3 = input.charCodeAt(start + 2);
771
+ if (c0_3 === 100 && c1_3 === 105 && c2_3 === 118) type = TokenType.DIV; // 'div'
772
+ else if (c0_3 === 109 && c1_3 === 111 && c2_3 === 100) type = TokenType.MOD; // 'mod'
773
+ else if (c0_3 === 97 && c1_3 === 110 && c2_3 === 100) type = TokenType.AND; // 'and'
774
+ else if (c0_3 === 120 && c1_3 === 111 && c2_3 === 114) type = TokenType.XOR; // 'xor'
775
+ else if (c0_3 === 100 && c1_3 === 97 && c2_3 === 121) type = TokenType.DAY; // 'day'
776
+ break;
777
+ case 4:
778
+ const c0_4 = input.charCodeAt(start);
779
+ if (c0_4 === 116 && // 't'
780
+ input.charCodeAt(start + 1) === 114 && // 'r'
781
+ input.charCodeAt(start + 2) === 117 && // 'u'
782
+ input.charCodeAt(start + 3) === 101) { // 'e'
783
+ type = TokenType.TRUE;
784
+ } else if (c0_4 === 121 && // 'y'
785
+ input.charCodeAt(start + 1) === 101 && // 'e'
786
+ input.charCodeAt(start + 2) === 97 && // 'a'
787
+ input.charCodeAt(start + 3) === 114) { // 'r'
788
+ type = TokenType.YEAR;
789
+ } else if (c0_4 === 119 && // 'w'
790
+ input.charCodeAt(start + 1) === 101 && // 'e'
791
+ input.charCodeAt(start + 2) === 101 && // 'e'
792
+ input.charCodeAt(start + 3) === 107) { // 'k'
793
+ type = TokenType.WEEK;
794
+ } else if (c0_4 === 104 && // 'h'
795
+ input.charCodeAt(start + 1) === 111 && // 'o'
796
+ input.charCodeAt(start + 2) === 117 && // 'u'
797
+ input.charCodeAt(start + 3) === 114) { // 'r'
798
+ type = TokenType.HOUR;
799
+ } else if (c0_4 === 100 && // 'd'
800
+ input.charCodeAt(start + 1) === 97 && // 'a'
801
+ input.charCodeAt(start + 2) === 121 && // 'y'
802
+ input.charCodeAt(start + 3) === 115) { // 's'
803
+ type = TokenType.DAYS;
804
+ }
805
+ break;
806
+ case 5:
807
+ const c0_5 = input.charCodeAt(start);
808
+ if (c0_5 === 102 && // 'f'
809
+ input.charCodeAt(start + 1) === 97 && // 'a'
810
+ input.charCodeAt(start + 2) === 108 && // 'l'
811
+ input.charCodeAt(start + 3) === 115 && // 's'
812
+ input.charCodeAt(start + 4) === 101) { // 'e'
813
+ type = TokenType.FALSE;
814
+ } else if (c0_5 === 109 && // 'm'
815
+ input.charCodeAt(start + 1) === 111 && // 'o'
816
+ input.charCodeAt(start + 2) === 110 && // 'n'
817
+ input.charCodeAt(start + 3) === 116 && // 't'
818
+ input.charCodeAt(start + 4) === 104) { // 'h'
819
+ type = TokenType.MONTH;
820
+ } else if (c0_5 === 119 && // 'w'
821
+ input.charCodeAt(start + 1) === 101 && // 'e'
822
+ input.charCodeAt(start + 2) === 101 && // 'e'
823
+ input.charCodeAt(start + 3) === 107 && // 'k'
824
+ input.charCodeAt(start + 4) === 115) { // 's'
825
+ type = TokenType.WEEKS;
826
+ } else if (c0_5 === 121 && // 'y'
827
+ input.charCodeAt(start + 1) === 101 && // 'e'
828
+ input.charCodeAt(start + 2) === 97 && // 'a'
829
+ input.charCodeAt(start + 3) === 114 && // 'r'
830
+ input.charCodeAt(start + 4) === 115) { // 's'
831
+ type = TokenType.YEARS;
832
+ } else if (c0_5 === 104 && // 'h'
833
+ input.charCodeAt(start + 1) === 111 && // 'o'
834
+ input.charCodeAt(start + 2) === 117 && // 'u'
835
+ input.charCodeAt(start + 3) === 114 && // 'r'
836
+ input.charCodeAt(start + 4) === 115) { // 's'
837
+ type = TokenType.HOURS;
838
+ }
839
+ break;
840
+ default:
841
+ // For longer keywords, fall back to substring
842
+ const value = input.substring(start, this.position);
843
+ switch (length) {
844
+ case 6:
845
+ if (value === 'minute') type = TokenType.MINUTE;
846
+ else if (value === 'second') type = TokenType.SECOND;
847
+ else if (value === 'months') type = TokenType.MONTHS;
848
+ break;
849
+ case 7:
850
+ if (value === 'implies') type = TokenType.IMPLIES;
851
+ else if (value === 'minutes') type = TokenType.MINUTES;
852
+ else if (value === 'seconds') type = TokenType.SECONDS;
853
+ break;
854
+ case 8:
855
+ if (value === 'contains') type = TokenType.CONTAINS;
856
+ break;
857
+ case 11:
858
+ if (value === 'millisecond') type = TokenType.MILLISECOND;
859
+ break;
860
+ case 12:
861
+ if (value === 'milliseconds') type = TokenType.MILLISECONDS;
862
+ break;
863
+ }
864
+ break;
865
+ }
866
+
867
+ return { type, start, end: this.position, line: startLine, column: startColumn };
868
+ }
869
+
870
+ private readSpecialIdentifier(): Token | null {
871
+ const start = this.position;
872
+ const startLine = this.line;
873
+ const startColumn = this.column;
874
+ if (this.position >= this.input.length || this.input.charCodeAt(this.position) !== 36) { // $
875
+ return null;
876
+ }
877
+
878
+ const len = this.input.length;
879
+ const pos = this.position;
880
+
881
+ // Check for $this (5 chars)
882
+ if (pos + 4 < len &&
883
+ this.input.charCodeAt(pos + 1) === 116 && // t
884
+ this.input.charCodeAt(pos + 2) === 104 && // h
885
+ this.input.charCodeAt(pos + 3) === 105 && // i
886
+ this.input.charCodeAt(pos + 4) === 115) { // s
887
+ this.position += 5;
888
+ this.column += 5;
889
+ return { type: TokenType.THIS, start, end: this.position, line: startLine, column: startColumn };
890
+ }
891
+
892
+ // Check for $index (6 chars)
893
+ if (pos + 5 < len &&
894
+ this.input.charCodeAt(pos + 1) === 105 && // i
895
+ this.input.charCodeAt(pos + 2) === 110 && // n
896
+ this.input.charCodeAt(pos + 3) === 100 && // d
897
+ this.input.charCodeAt(pos + 4) === 101 && // e
898
+ this.input.charCodeAt(pos + 5) === 120) { // x
899
+ this.position += 6;
900
+ this.column += 6;
901
+ return { type: TokenType.INDEX, start, end: this.position, line: startLine, column: startColumn };
902
+ }
903
+
904
+ // Check for $total (6 chars)
905
+ if (pos + 5 < len &&
906
+ this.input.charCodeAt(pos + 1) === 116 && // t
907
+ this.input.charCodeAt(pos + 2) === 111 && // o
908
+ this.input.charCodeAt(pos + 3) === 116 && // t
909
+ this.input.charCodeAt(pos + 4) === 97 && // a
910
+ this.input.charCodeAt(pos + 5) === 108) { // l
911
+ this.position += 6;
912
+ this.column += 6;
913
+ return { type: TokenType.TOTAL, start, end: this.position, line: startLine, column: startColumn };
914
+ }
915
+
916
+ return null;
917
+ }
918
+
919
+ private readEnvVar(): Token | null {
920
+ const start = this.position;
921
+ const startLine = this.line;
922
+ const startColumn = this.column;
923
+
924
+ if (this.peekCharCode() !== 37) { // %
925
+ return null;
926
+ }
927
+
928
+ this.advance(); // %
929
+
930
+ // Check what follows the %
931
+ const nextCharCode = this.peekCharCode();
932
+
933
+ if (nextCharCode === 39) { // '
934
+ // String form: %'string'
935
+ this.advance(); // '
936
+
937
+ while (this.position < this.input.length) {
938
+ const charCode = this.peekCharCode();
939
+
940
+ if (charCode === 39) { // '
941
+ this.advance();
942
+ return { type: TokenType.ENV_VAR, start, end: this.position, line: startLine, column: startColumn };
943
+ }
944
+
945
+ if (charCode === 92) { // \
946
+ this.advance();
947
+ const escapedCode = this.peekCharCode();
948
+ switch (escapedCode) {
949
+ case 39: // '
950
+ case 92: // \
951
+ case 47: // /
952
+ case 102: // f
953
+ case 110: // n
954
+ case 114: // r
955
+ case 116: // t
956
+ this.advance();
957
+ break;
958
+ case 117: // u
959
+ this.advance();
960
+ for (let i = 0; i < 4; i++) {
961
+ const hexCode = this.peekCharCode();
962
+ if (hexCode !== -1 && IS_HEX_DIGIT[hexCode]) {
963
+ this.advance();
964
+ } else {
965
+ throw new Error(`Invalid unicode escape in environment variable at position ${this.position}`);
966
+ }
967
+ }
968
+ break;
969
+ default:
970
+ const escaped = escapedCode === -1 ? '' : String.fromCharCode(escapedCode);
971
+ throw new Error(`Invalid escape sequence \\${escaped} in environment variable at position ${this.position}`);
972
+ }
973
+ } else {
974
+ this.advance();
975
+ }
976
+ }
977
+
978
+ throw new Error(`Unterminated environment variable string at position ${start}`);
979
+
980
+ } else if (nextCharCode === 96) { // `
981
+ // Delimited form: %`delimited`
982
+ this.advance(); // `
983
+
984
+ while (this.position < this.input.length) {
985
+ const charCode = this.peekCharCode();
986
+
987
+ if (charCode === 96) { // `
988
+ this.advance();
989
+ return { type: TokenType.ENV_VAR, start, end: this.position, line: startLine, column: startColumn };
990
+ }
991
+
992
+ if (charCode === 92) { // \
993
+ this.advance();
994
+ const escapedCode = this.peekCharCode();
995
+ if (escapedCode === 96 || escapedCode === 92) { // ` or \
996
+ this.advance();
997
+ }
998
+ } else {
999
+ this.advance();
1000
+ }
1001
+ }
1002
+
1003
+ throw new Error(`Unterminated environment variable delimiter at position ${start}`);
1004
+
1005
+ } else {
1006
+ // Identifier form: %identifier (ASCII only per spec)
1007
+ const firstCharCode = this.peekCharCode();
1008
+ if (firstCharCode !== -1 && IS_LETTER[firstCharCode]) {
1009
+ // Read identifier
1010
+ while (this.position < this.input.length) {
1011
+ const charCode = this.peekCharCode();
1012
+ if (charCode !== -1 && IS_LETTER_OR_DIGIT[charCode]) {
1013
+ this.advance();
1014
+ } else {
1015
+ break;
1016
+ }
1017
+ }
1018
+
1019
+ return { type: TokenType.ENV_VAR, start, end: this.position, line: startLine, column: startColumn };
1020
+ } else {
1021
+ // Just a percent sign, not an env var
1022
+ this.position = start;
1023
+ this.line = startLine;
1024
+ this.column = startColumn;
1025
+ return null;
1026
+ }
1027
+ }
1028
+ }
1029
+
1030
+ public nextToken(): Token {
1031
+ // Skip whitespace and comments
1032
+ while (this.position < this.input.length) {
1033
+ const wsToken = this.readWhitespace();
1034
+ if (wsToken) {
1035
+ if (this.options.preserveTrivia || !this.options.skipWhitespace) return wsToken;
1036
+ continue;
1037
+ }
1038
+
1039
+ const commentToken = this.readComment();
1040
+ if (commentToken) {
1041
+ if (this.options.preserveTrivia || !this.options.skipComments) return commentToken;
1042
+ continue;
1043
+ }
1044
+
1045
+ break;
1046
+ }
1047
+
1048
+ if (this.position >= this.input.length) {
1049
+ return { type: TokenType.EOF, start: this.position, end: this.position, line: this.line, column: this.column };
1050
+ }
1051
+
1052
+ const start = this.position;
1053
+ const startLine = this.line;
1054
+ const startColumn = this.column;
1055
+ const firstCharCode = this.peekCharCode();
1056
+
1057
+ // Switch on character code for faster dispatch
1058
+ switch (firstCharCode) {
1059
+ // String literals
1060
+ case 39: // '
1061
+ case 34: // "
1062
+ return this.readString() || this.throwUnexpectedChar(String.fromCharCode(firstCharCode));
1063
+
1064
+ // Delimited identifier
1065
+ case 96: // `
1066
+ return this.readDelimitedIdentifier() || this.throwUnexpectedChar(String.fromCharCode(firstCharCode));
1067
+
1068
+ // DateTime/Time or AT operator
1069
+ case 64: // @
1070
+ const dt = this.readDateTime();
1071
+ if (dt) return dt;
1072
+ // If not datetime, it's AT operator
1073
+ this.advance();
1074
+ return { type: TokenType.AT, start, end: this.position, line: startLine, column: startColumn };
1075
+
1076
+ // Special identifiers
1077
+ case 36: // $
1078
+ return this.readSpecialIdentifier() || this.throwUnexpectedChar(String.fromCharCode(firstCharCode));
1079
+
1080
+ // Environment variable or percent
1081
+ case 37: // %
1082
+ const envVar = this.readEnvVar();
1083
+ if (envVar) return envVar;
1084
+ // If not an env var, it's just a percent operator
1085
+ this.position++;
1086
+ this.column++;
1087
+ return { type: TokenType.PERCENT, start, end: this.position, line: startLine, column: startColumn };
1088
+
1089
+ // Single-character operators
1090
+ case 46: // .
1091
+ this.position++;
1092
+ this.column++;
1093
+ return { type: TokenType.DOT, start, end: this.position, line: startLine, column: startColumn };
1094
+ case 40: // (
1095
+ this.position++;
1096
+ this.column++;
1097
+ return { type: TokenType.LPAREN, start, end: this.position, line: startLine, column: startColumn };
1098
+ case 41: // )
1099
+ this.position++;
1100
+ this.column++;
1101
+ return { type: TokenType.RPAREN, start, end: this.position, line: startLine, column: startColumn };
1102
+ case 91: // [
1103
+ this.advance();
1104
+ return { type: TokenType.LBRACKET, start, end: this.position, line: startLine, column: startColumn };
1105
+ case 93: // ]
1106
+ this.advance();
1107
+ return { type: TokenType.RBRACKET, start, end: this.position, line: startLine, column: startColumn };
1108
+ case 123: // {
1109
+ this.advance();
1110
+ return { type: TokenType.LBRACE, start, end: this.position, line: startLine, column: startColumn };
1111
+ case 125: // }
1112
+ this.advance();
1113
+ return { type: TokenType.RBRACE, start, end: this.position, line: startLine, column: startColumn };
1114
+ case 43: // +
1115
+ this.position++;
1116
+ this.column++;
1117
+ return { type: TokenType.PLUS, start, end: this.position, line: startLine, column: startColumn };
1118
+ case 45: // -
1119
+ this.position++;
1120
+ this.column++;
1121
+ return { type: TokenType.MINUS, start, end: this.position, line: startLine, column: startColumn };
1122
+ case 42: // *
1123
+ this.position++;
1124
+ this.column++;
1125
+ return { type: TokenType.MULTIPLY, start, end: this.position, line: startLine, column: startColumn };
1126
+ case 47: // /
1127
+ this.position++;
1128
+ this.column++;
1129
+ return { type: TokenType.DIVIDE, start, end: this.position, line: startLine, column: startColumn };
1130
+ case 38: // &
1131
+ this.position++;
1132
+ this.column++;
1133
+ return { type: TokenType.AMPERSAND, start, end: this.position, line: startLine, column: startColumn };
1134
+ case 124: // |
1135
+ this.position++;
1136
+ this.column++;
1137
+ return { type: TokenType.PIPE, start, end: this.position, line: startLine, column: startColumn };
1138
+ case 126: // ~
1139
+ this.advance();
1140
+ return { type: TokenType.SIMILAR, start, end: this.position, line: startLine, column: startColumn };
1141
+ case 44: // ,
1142
+ this.advance();
1143
+ return { type: TokenType.COMMA, start, end: this.position, line: startLine, column: startColumn };
1144
+ case 61: // =
1145
+ this.position++;
1146
+ this.column++;
1147
+ return { type: TokenType.EQ, start, end: this.position, line: startLine, column: startColumn };
1148
+
1149
+ // Two-character operators starting with <
1150
+ case 60: // <
1151
+ this.position++;
1152
+ this.column++;
1153
+ if (this.peekCharCode() === 61) { // =
1154
+ this.position++;
1155
+ this.column++;
1156
+ return { type: TokenType.LTE, start, end: this.position, line: startLine, column: startColumn };
1157
+ }
1158
+ return { type: TokenType.LT, start, end: this.position, line: startLine, column: startColumn };
1159
+
1160
+ // Two-character operators starting with >
1161
+ case 62: // >
1162
+ this.position++;
1163
+ this.column++;
1164
+ if (this.peekCharCode() === 61) { // =
1165
+ this.position++;
1166
+ this.column++;
1167
+ return { type: TokenType.GTE, start, end: this.position, line: startLine, column: startColumn };
1168
+ }
1169
+ return { type: TokenType.GT, start, end: this.position, line: startLine, column: startColumn };
1170
+
1171
+ // Two-character operators starting with !
1172
+ case 33: // !
1173
+ this.position++;
1174
+ this.column++;
1175
+ const nextCharCode = this.peekCharCode();
1176
+ if (nextCharCode === 61) { // =
1177
+ this.position++;
1178
+ this.column++;
1179
+ return { type: TokenType.NEQ, start, end: this.position, line: startLine, column: startColumn };
1180
+ } else if (nextCharCode === 126) { // ~
1181
+ this.position++;
1182
+ this.column++;
1183
+ return { type: TokenType.NOT_SIMILAR, start, end: this.position, line: startLine, column: startColumn };
1184
+ }
1185
+ throw new Error(`Unexpected character '!' at position ${this.position - 1}`);
1186
+
1187
+ // EOF
1188
+ case -1:
1189
+ return { type: TokenType.EOF, start: this.position, end: this.position, line: this.line, column: this.column };
1190
+
1191
+ default:
1192
+ // Check if it's a digit (0-9)
1193
+ if (IS_DIGIT[firstCharCode]) {
1194
+ return this.readNumber() || this.throwUnexpectedChar(String.fromCharCode(firstCharCode));
1195
+ }
1196
+
1197
+ // Check if it's a letter (A-Z, a-z, _)
1198
+ if (IS_LETTER[firstCharCode]) {
1199
+ return this.readIdentifierOrKeyword() || this.throwUnexpectedChar(String.fromCharCode(firstCharCode));
1200
+ }
1201
+
1202
+ // Unknown character
1203
+ const unknownChar = String.fromCharCode(firstCharCode);
1204
+ throw new Error(`Unexpected character '${unknownChar}' at position ${this.position}`);
1205
+ }
1206
+ }
1207
+
1208
+ public tokenize(): Token[] {
1209
+ const tokens: Token[] = [];
1210
+
1211
+ while (true) {
1212
+ const token = this.nextToken();
1213
+ tokens.push(token);
1214
+ if (token.type === TokenType.EOF) {
1215
+ break;
1216
+ }
1217
+ }
1218
+
1219
+ return tokens;
1220
+ }
1221
+
1222
+ public getTokenValue(token: Token): string {
1223
+ return this.input.substring(token.start, token.end);
1224
+ }
1225
+
1226
+ // Debug helper to print tokens in human-readable format
1227
+ public debugTokens(tokens?: Token[]): string {
1228
+ if (!tokens) {
1229
+ // Save current position and reset
1230
+ const savedPosition = this.position;
1231
+ const savedLine = this.line;
1232
+ const savedColumn = this.column;
1233
+ this.position = 0;
1234
+ this.line = 1;
1235
+ this.column = 1;
1236
+
1237
+ tokens = this.tokenize();
1238
+
1239
+ // Restore position
1240
+ this.position = savedPosition;
1241
+ this.line = savedLine;
1242
+ this.column = savedColumn;
1243
+ }
1244
+
1245
+ return tokens.map(token => {
1246
+ const value = this.getTokenValue(token);
1247
+ const type = tokenTypeToString(token.type);
1248
+ return `${type}(${value}) [${token.line}:${token.column}]`;
1249
+ }).join('\n');
1250
+ }
1251
+ }