@atomic-ehr/fhirpath 0.0.1-canary.35b105d.20250724165800

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. package/README.md +307 -0
  2. package/dist/index.d.ts +225 -0
  3. package/dist/index.js +8185 -0
  4. package/dist/index.js.map +1 -0
  5. package/package.json +51 -0
  6. package/src/analyzer/analyzer.ts +486 -0
  7. package/src/analyzer/model-provider.ts +244 -0
  8. package/src/analyzer/schemas/index.ts +2 -0
  9. package/src/analyzer/schemas/types.ts +40 -0
  10. package/src/analyzer/types.ts +142 -0
  11. package/src/api/builder.ts +148 -0
  12. package/src/api/errors.ts +134 -0
  13. package/src/api/expression.ts +152 -0
  14. package/src/api/index.ts +57 -0
  15. package/src/api/registry.ts +128 -0
  16. package/src/api/types.ts +154 -0
  17. package/src/compiler/compiler.ts +579 -0
  18. package/src/compiler/index.ts +2 -0
  19. package/src/compiler/prototype-context-adapter.ts +99 -0
  20. package/src/compiler/types.ts +23 -0
  21. package/src/index.ts +52 -0
  22. package/src/interpreter/README.md +78 -0
  23. package/src/interpreter/interpreter.ts +485 -0
  24. package/src/interpreter/types.ts +110 -0
  25. package/src/lexer/char-tables.ts +37 -0
  26. package/src/lexer/errors.ts +31 -0
  27. package/src/lexer/index.ts +5 -0
  28. package/src/lexer/lexer.ts +745 -0
  29. package/src/lexer/token.ts +104 -0
  30. package/src/parser/ast.ts +123 -0
  31. package/src/parser/index.ts +3 -0
  32. package/src/parser/parser.ts +701 -0
  33. package/src/parser/pprint.ts +169 -0
  34. package/src/registry/default-analyzers.ts +257 -0
  35. package/src/registry/default-compilers.ts +31 -0
  36. package/src/registry/index.ts +93 -0
  37. package/src/registry/operations/arithmetic.ts +506 -0
  38. package/src/registry/operations/collection.ts +425 -0
  39. package/src/registry/operations/comparison.ts +432 -0
  40. package/src/registry/operations/existence.ts +703 -0
  41. package/src/registry/operations/filtering.ts +358 -0
  42. package/src/registry/operations/literals.ts +341 -0
  43. package/src/registry/operations/logical.ts +402 -0
  44. package/src/registry/operations/math.ts +128 -0
  45. package/src/registry/operations/membership.ts +132 -0
  46. package/src/registry/operations/string.ts +507 -0
  47. package/src/registry/operations/subsetting.ts +174 -0
  48. package/src/registry/operations/type-checking.ts +162 -0
  49. package/src/registry/operations/type-conversion.ts +404 -0
  50. package/src/registry/operations/type-operators.ts +307 -0
  51. package/src/registry/operations/utility.ts +542 -0
  52. package/src/registry/registry.ts +146 -0
  53. package/src/registry/types.ts +161 -0
  54. package/src/registry/utils/evaluation-helpers.ts +93 -0
  55. package/src/registry/utils/index.ts +3 -0
  56. package/src/registry/utils/type-system.ts +173 -0
  57. package/src/runtime/context.ts +179 -0
@@ -0,0 +1,745 @@
1
+ import type { Token, Position } from './token';
2
+ import { TokenType, Channel } from './token';
3
+ import { CHAR_FLAGS, FLAG_DIGIT, FLAG_IDENTIFIER_START, FLAG_IDENTIFIER_CONT, FLAG_WHITESPACE } from './char-tables';
4
+ import { LexerError } from './errors';
5
+ import { Registry } from '../registry';
6
+ import type { Operation, Literal } from '../registry';
7
+
8
+ // Token object pool to reduce allocations
9
+ class TokenPool {
10
+ private pool: Token[] = [];
11
+ private poolIndex: number = 0;
12
+
13
+ getToken(type: TokenType, value: string, position: Position): Token {
14
+ if (this.poolIndex < this.pool.length) {
15
+ const token = this.pool[this.poolIndex++]!;
16
+ token.type = type;
17
+ token.value = value;
18
+ token.position = position;
19
+ token.channel = Channel.DEFAULT;
20
+ return token;
21
+ } else {
22
+ const token = { type, value, position, channel: Channel.DEFAULT };
23
+ this.pool.push(token);
24
+ this.poolIndex++;
25
+ return token;
26
+ }
27
+ }
28
+
29
+ reset() {
30
+ this.poolIndex = 0;
31
+ }
32
+ }
33
+
34
+ export class FHIRPathLexer {
35
+ private chars: string[]; // Character array for O(1) access
36
+ private length: number;
37
+ private position: number = 0;
38
+ private line: number = 1;
39
+ private column: number = 1;
40
+ private tokenPool = new TokenPool();
41
+
42
+ // String interning for common tokens
43
+ private readonly internedStrings = new Map<string, string>();
44
+
45
+ constructor(input: string) {
46
+ this.chars = Array.from(input);
47
+ this.length = this.chars.length;
48
+
49
+ // Pre-intern common strings
50
+ const common = ['true', 'false', 'and', 'or', 'where', 'select', 'exists'];
51
+ for (const str of common) {
52
+ this.internedStrings.set(str, str);
53
+ }
54
+ }
55
+
56
+ tokenize(): Token[] {
57
+ const tokens: Token[] = [];
58
+ this.tokenPool.reset();
59
+
60
+ while (!this.isAtEnd()) {
61
+ this.skipWhitespaceAndComments();
62
+ if (this.isAtEnd()) break;
63
+
64
+ const token = this.nextToken();
65
+ if (token) {
66
+ tokens.push(token);
67
+ }
68
+ }
69
+
70
+ tokens.push(this.tokenPool.getToken(TokenType.EOF, '', this.getCurrentPosition()));
71
+ return tokens;
72
+ }
73
+
74
+ private nextToken(): Token | null {
75
+ const start = this.savePosition();
76
+
77
+ const char = this.peek();
78
+ const code = char.charCodeAt(0);
79
+
80
+ // Fast path for ASCII characters using switch
81
+ if (code < 128) {
82
+ switch (code) {
83
+ // Whitespace (should have been skipped, but just in case)
84
+ case 32: case 9: case 10: case 13:
85
+ this.advance();
86
+ return null;
87
+
88
+ // Single character tokens
89
+ case 46: return this.makeTokenAndAdvance(TokenType.DOT, '.'); // .
90
+ case 44: return this.makeTokenAndAdvance(TokenType.COMMA, ','); // ,
91
+ case 40: return this.makeTokenAndAdvance(TokenType.LPAREN, '('); // (
92
+ case 41: return this.makeTokenAndAdvance(TokenType.RPAREN, ')'); // )
93
+ case 91: return this.makeTokenAndAdvance(TokenType.LBRACKET, '['); // [
94
+ case 93: return this.makeTokenAndAdvance(TokenType.RBRACKET, ']'); // ]
95
+ case 43: return this.makeTokenAndAdvance(TokenType.PLUS, '+'); // +
96
+ case 45: return this.makeTokenAndAdvance(TokenType.MINUS, '-'); // -
97
+ case 42: return this.makeTokenAndAdvance(TokenType.STAR, '*'); // *
98
+ case 47: return this.scanSlashOrComment(); // /
99
+ case 38: return this.makeTokenAndAdvance(TokenType.CONCAT, '&'); // &
100
+ case 124: return this.makeTokenAndAdvance(TokenType.PIPE, '|'); // |
101
+ case 61: return this.makeTokenAndAdvance(TokenType.EQ, '='); // =
102
+ case 126: return this.makeTokenAndAdvance(TokenType.EQUIV, '~'); // ~
103
+
104
+ // Multi-character tokens
105
+ case 60: return this.scanLessThan(); // < or <=
106
+ case 62: return this.scanGreaterThan(); // > or >=
107
+ case 33: return this.scanExclamation(); // != or !~
108
+
109
+ // Complex tokens
110
+ case 39: return this.scanString(); // '
111
+ case 96: return this.scanDelimitedIdentifier(); // `
112
+ case 64: return this.scanDateTime(); // @
113
+ case 37: return this.scanEnvironmentVariable(); // %
114
+ case 36: return this.scanSpecialVariable(); // $
115
+ case 123: return this.makeTokenAndAdvance(TokenType.LBRACE, '{'); // {
116
+ case 125: return this.makeTokenAndAdvance(TokenType.RBRACE, '}'); // }
117
+
118
+ default:
119
+ // Use lookup table for classification
120
+ if ((CHAR_FLAGS[code]! & FLAG_DIGIT) !== 0) {
121
+ return this.scanNumber();
122
+ }
123
+ if ((CHAR_FLAGS[code]! & FLAG_IDENTIFIER_START) !== 0) {
124
+ return this.scanIdentifierOrKeyword();
125
+ }
126
+ }
127
+ }
128
+
129
+ // Fallback for non-ASCII
130
+ if (this.isIdentifierStart(char)) {
131
+ return this.scanIdentifierOrKeyword();
132
+ }
133
+
134
+ throw this.error(`Unexpected character: ${char}`);
135
+ }
136
+
137
+ // Multi-character operator scanners
138
+ private scanSlashOrComment(): Token | null {
139
+ if (this.peekNext() === '/') {
140
+ // Single-line comment - skip it
141
+ this.skipWhitespaceAndComments();
142
+ return null;
143
+ } else if (this.peekNext() === '*') {
144
+ // Multi-line comment - skip it
145
+ this.skipWhitespaceAndComments();
146
+ return null;
147
+ } else {
148
+ // Division operator
149
+ return this.makeTokenAndAdvance(TokenType.SLASH, '/');
150
+ }
151
+ }
152
+
153
+ private scanLessThan(): Token {
154
+ const start = this.savePosition();
155
+ this.advance(); // <
156
+
157
+ if (this.peek() === '=') {
158
+ this.advance(); // =
159
+ return this.makeToken(TokenType.LTE, '<=', start);
160
+ }
161
+
162
+ return this.makeToken(TokenType.LT, '<', start);
163
+ }
164
+
165
+ private scanGreaterThan(): Token {
166
+ const start = this.savePosition();
167
+ this.advance(); // >
168
+
169
+ if (this.peek() === '=') {
170
+ this.advance(); // =
171
+ return this.makeToken(TokenType.GTE, '>=', start);
172
+ }
173
+
174
+ return this.makeToken(TokenType.GT, '>', start);
175
+ }
176
+
177
+ private scanExclamation(): Token {
178
+ const start = this.savePosition();
179
+ this.advance(); // !
180
+
181
+ if (this.peek() === '=') {
182
+ this.advance(); // =
183
+ return this.makeToken(TokenType.NEQ, '!=', start);
184
+ } else if (this.peek() === '~') {
185
+ this.advance(); // ~
186
+ return this.makeToken(TokenType.NEQUIV, '!~', start);
187
+ }
188
+
189
+ throw this.error('Expected "=" or "~" after "!"');
190
+ }
191
+
192
+ // Fast character classification using lookup table
193
+ private isDigit(char: string): boolean {
194
+ const code = char.charCodeAt(0);
195
+ return code < 128 && (CHAR_FLAGS[code]! & FLAG_DIGIT) !== 0;
196
+ }
197
+
198
+ private isIdentifierStart(char: string): boolean {
199
+ const code = char.charCodeAt(0);
200
+ if (code < 128) {
201
+ return (CHAR_FLAGS[code]! & FLAG_IDENTIFIER_START) !== 0;
202
+ }
203
+ return this.isUnicodeIdentifierStart(char);
204
+ }
205
+
206
+ private isWhitespace(char: string): boolean {
207
+ const code = char.charCodeAt(0);
208
+ return code < 128 && (CHAR_FLAGS[code]! & FLAG_WHITESPACE) !== 0;
209
+ }
210
+
211
+ // O(1) character access
212
+ private peek(offset: number = 0): string {
213
+ const pos = this.position + offset;
214
+ return pos < this.length ? this.chars[pos]! : '\0';
215
+ }
216
+
217
+ // String interning for memory efficiency
218
+ private intern(str: string): string {
219
+ if (str.length <= 10) {
220
+ const interned = this.internedStrings.get(str);
221
+ if (interned) return interned;
222
+ this.internedStrings.set(str, str);
223
+ }
224
+ return str;
225
+ }
226
+
227
+ // Helper Methods
228
+
229
+ // Position and Character Navigation
230
+ private savePosition(): Position {
231
+ return {
232
+ line: this.line,
233
+ column: this.column,
234
+ offset: this.position
235
+ };
236
+ }
237
+
238
+ private getCurrentPosition(): Position {
239
+ return this.savePosition();
240
+ }
241
+
242
+ private advance(): string {
243
+ const char = this.chars[this.position++]!;
244
+
245
+ // Update line/column for newlines
246
+ if (char === '\n') {
247
+ this.line++;
248
+ this.column = 1;
249
+ } else {
250
+ this.column++;
251
+ }
252
+
253
+ return char;
254
+ }
255
+
256
+ private isAtEnd(): boolean {
257
+ return this.position >= this.length;
258
+ }
259
+
260
+ private peekNext(): string {
261
+ return this.peek(1);
262
+ }
263
+
264
+ // Scanning Utilities
265
+ private scanDigits(count?: number): string | null {
266
+ const start = this.position;
267
+ let scanned = 0;
268
+
269
+ while (!this.isAtEnd() && this.isDigit(this.peek())) {
270
+ this.advance();
271
+ scanned++;
272
+ if (count !== undefined && scanned >= count) break;
273
+ }
274
+
275
+ if (count !== undefined && scanned < count) {
276
+ return null; // Didn't get required number of digits
277
+ }
278
+
279
+ return this.chars.slice(start, this.position).join('');
280
+ }
281
+
282
+ private scanIdentifier(): string {
283
+ const start = this.position;
284
+
285
+ while (!this.isAtEnd()) {
286
+ const char = this.peek();
287
+ const code = char.charCodeAt(0);
288
+
289
+ if (code < 128) {
290
+ if ((CHAR_FLAGS[code]! & FLAG_IDENTIFIER_CONT) === 0) break;
291
+ } else {
292
+ // Handle non-ASCII Unicode letters/digits
293
+ if (!this.isUnicodeIdentifierCont(char)) break;
294
+ }
295
+
296
+ this.advance();
297
+ }
298
+
299
+ return this.chars.slice(start, this.position).join('');
300
+ }
301
+
302
+ private scanUntil(target: string): string {
303
+ const start = this.position;
304
+
305
+ while (!this.isAtEnd() && this.peek() !== target) {
306
+ this.advance();
307
+ }
308
+
309
+ return this.chars.slice(start, this.position).join('');
310
+ }
311
+
312
+ private getTextFromPosition(start: Position): string {
313
+ return this.chars.slice(start.offset, this.position).join('');
314
+ }
315
+
316
+ // Token Creation
317
+ private makeToken(
318
+ type: TokenType,
319
+ value: string,
320
+ start: Position,
321
+ channel: Channel = Channel.DEFAULT
322
+ ): Token {
323
+ const token = this.tokenPool.getToken(type, value, start);
324
+ token.channel = channel;
325
+ return token;
326
+ }
327
+
328
+ private makeTokenAndAdvance(type: TokenType, value: string): Token {
329
+ const start = this.savePosition();
330
+ this.advance();
331
+ return this.makeToken(type, value, start);
332
+ }
333
+
334
+ // Complex Token Scanners
335
+
336
+ private scanString(): Token {
337
+ const start = this.savePosition();
338
+ this.advance(); // consume opening '
339
+
340
+ let value = '';
341
+ while (!this.isAtEnd() && this.peek() !== "'") {
342
+ if (this.peek() === '\\') {
343
+ this.advance();
344
+ value += this.scanEscapeSequence();
345
+ } else {
346
+ value += this.advance();
347
+ }
348
+ }
349
+
350
+ if (this.isAtEnd()) {
351
+ throw this.error('Unterminated string');
352
+ }
353
+
354
+ this.advance(); // consume closing '
355
+ return this.makeToken(TokenType.STRING, value, start);
356
+ }
357
+
358
+ private scanEscapeSequence(): string {
359
+ const char = this.advance();
360
+ switch (char) {
361
+ case '`': return '`';
362
+ case "'": return "'";
363
+ case '\\': return '\\';
364
+ case '/': return '/';
365
+ case 'f': return '\f';
366
+ case 'n': return '\n';
367
+ case 'r': return '\r';
368
+ case 't': return '\t';
369
+ case 'u': return this.scanUnicodeEscape();
370
+ default: throw this.error(`Invalid escape sequence: \\${char}`);
371
+ }
372
+ }
373
+
374
+ private scanUnicodeEscape(): string {
375
+ // \uXXXX - exactly 4 hex digits
376
+ let code = 0;
377
+ for (let i = 0; i < 4; i++) {
378
+ const char = this.peek();
379
+ const digit = this.hexDigitValue(char);
380
+ if (digit === -1) {
381
+ throw this.error(`Invalid unicode escape sequence: expected hex digit, got '${char}'`);
382
+ }
383
+ code = code * 16 + digit;
384
+ this.advance();
385
+ }
386
+ return String.fromCharCode(code);
387
+ }
388
+
389
+ private hexDigitValue(char: string): number {
390
+ const code = char.charCodeAt(0);
391
+ if (code >= 48 && code <= 57) return code - 48; // 0-9
392
+ if (code >= 65 && code <= 70) return code - 65 + 10; // A-F
393
+ if (code >= 97 && code <= 102) return code - 97 + 10; // a-f
394
+ return -1;
395
+ }
396
+
397
+ // Special Variables
398
+ private scanSpecialVariable(): Token {
399
+ const start = this.savePosition();
400
+ this.advance(); // consume $
401
+
402
+ const name = this.scanIdentifier();
403
+ const fullName = '$' + name;
404
+
405
+ if (name === 'this') {
406
+ return this.makeToken(TokenType.THIS, fullName, start);
407
+ } else if (name === 'index') {
408
+ return this.makeToken(TokenType.INDEX, fullName, start);
409
+ } else if (name === 'total') {
410
+ return this.makeToken(TokenType.TOTAL, fullName, start);
411
+ } else {
412
+ throw this.error(`Invalid special variable: ${fullName}`);
413
+ }
414
+ }
415
+
416
+ // Date/Time Literals
417
+ private scanDateTime(): Token {
418
+ const start = this.savePosition();
419
+ this.advance(); // consume @
420
+
421
+ // Check for time-only literal: @T14:30:00
422
+ if (this.peek() === 'T') {
423
+ this.advance(); // consume T
424
+ const timeFormat = this.scanTimeFormat();
425
+ if (!timeFormat) {
426
+ throw this.error('Invalid time format: expected time after @T');
427
+ }
428
+ return this.makeToken(TokenType.TIME, '@T' + timeFormat, start);
429
+ }
430
+
431
+ // Date, DateTime, or partial date literal
432
+ let value = '@';
433
+
434
+ // Year is required
435
+ const year = this.scanDigits(4);
436
+ if (!year) throw this.error('Invalid date/time format: expected 4-digit year');
437
+ value += year;
438
+
439
+ // Month is optional
440
+ if (this.peek() === '-') {
441
+ value += this.advance(); // -
442
+ const month = this.scanDigits(2);
443
+ if (!month) throw this.error('Invalid month');
444
+ value += month;
445
+
446
+ // Day is optional if month is present
447
+ if (this.peek() === '-') {
448
+ value += this.advance(); // -
449
+ const day = this.scanDigits(2);
450
+ if (!day) throw this.error('Invalid day');
451
+ value += day;
452
+
453
+ // Time component is optional
454
+ if (this.peek() === 'T') {
455
+ value += this.advance(); // T
456
+ const timeFormat = this.scanTimeFormat();
457
+ if (timeFormat) {
458
+ value += timeFormat;
459
+ }
460
+ }
461
+ } else if (this.peek() === 'T') {
462
+ // Month without day but with time (rare but allowed)
463
+ value += this.advance(); // T
464
+ const timeFormat = this.scanTimeFormat();
465
+ if (timeFormat) {
466
+ value += timeFormat;
467
+ }
468
+ }
469
+ } else if (this.peek() === 'T') {
470
+ // Year with time but no month/day (also rare but allowed)
471
+ value += this.advance(); // T
472
+ const timeFormat = this.scanTimeFormat();
473
+ if (timeFormat) {
474
+ value += timeFormat;
475
+ }
476
+ }
477
+
478
+ // Timezone is optional
479
+ if (this.peek() === 'Z' || this.peek() === '+' || this.peek() === '-') {
480
+ value += this.scanTimezone();
481
+ }
482
+
483
+ // Determine token type based on content
484
+ const tokenType = value.includes('T') ? TokenType.DATETIME : TokenType.DATE;
485
+ return this.makeToken(tokenType, value, start);
486
+ }
487
+
488
+ private scanTimeFormat(): string {
489
+ // TIMEFORMAT: [0-9][0-9] (':'[0-9][0-9] (':'[0-9][0-9] ('.'[0-9]+)?)?)?
490
+ let time = '';
491
+
492
+ // Hour is required
493
+ const hour = this.scanDigits(2);
494
+ if (!hour) return ''; // Empty time allowed in some contexts
495
+ time += hour;
496
+
497
+ // Minutes optional
498
+ if (this.peek() === ':') {
499
+ time += this.advance(); // :
500
+ const minute = this.scanDigits(2);
501
+ if (!minute) throw this.error('Invalid time format: expected 2-digit minute');
502
+ time += minute;
503
+
504
+ // Seconds optional
505
+ if (this.peek() === ':') {
506
+ time += this.advance(); // :
507
+ const second = this.scanDigits(2);
508
+ if (!second) throw this.error('Invalid time format: expected 2-digit second');
509
+ time += second;
510
+
511
+ // Fractional seconds optional
512
+ if (this.peek() === '.') {
513
+ time += this.advance(); // .
514
+ const fraction = this.scanDigits();
515
+ if (!fraction) throw this.error('Invalid time format: expected fractional seconds');
516
+ time += fraction;
517
+ }
518
+ }
519
+ }
520
+
521
+ return time;
522
+ }
523
+
524
+ private scanTimezone(): string {
525
+ const char = this.peek();
526
+
527
+ if (char === 'Z') {
528
+ this.advance();
529
+ return 'Z';
530
+ }
531
+
532
+ if (char === '+' || char === '-') {
533
+ let tz = this.advance(); // + or -
534
+ const hour = this.scanDigits(2);
535
+ if (!hour) throw this.error('Invalid timezone: expected 2-digit hour');
536
+ tz += hour;
537
+
538
+ if (this.peek() !== ':') {
539
+ throw this.error('Invalid timezone: expected ":" after hour');
540
+ }
541
+ tz += this.advance(); // :
542
+
543
+ const minute = this.scanDigits(2);
544
+ if (!minute) throw this.error('Invalid timezone: expected 2-digit minute');
545
+ tz += minute;
546
+
547
+ return tz;
548
+ }
549
+
550
+ return '';
551
+ }
552
+
553
+ // Number Literals
554
+ private scanNumber(): Token {
555
+ const start = this.savePosition();
556
+
557
+ // Allow leading zeros (e.g., 0123)
558
+ while (this.isDigit(this.peek())) {
559
+ this.advance();
560
+ }
561
+
562
+ // Check for decimal point
563
+ if (this.peek() === '.' && this.isDigit(this.peekNext())) {
564
+ this.advance(); // consume .
565
+ while (this.isDigit(this.peek())) {
566
+ this.advance();
567
+ }
568
+ }
569
+
570
+ const value = this.getTextFromPosition(start);
571
+ return this.makeToken(TokenType.NUMBER, value, start);
572
+ }
573
+
574
+ // Delimited Identifiers
575
+ private scanDelimitedIdentifier(): Token {
576
+ const start = this.savePosition();
577
+ this.advance(); // consume opening `
578
+
579
+ let value = '';
580
+ while (!this.isAtEnd() && this.peek() !== '`') {
581
+ if (this.peek() === '\\') {
582
+ this.advance();
583
+ value += this.scanEscapeSequence();
584
+ } else {
585
+ value += this.advance();
586
+ }
587
+ }
588
+
589
+ if (this.isAtEnd()) {
590
+ throw this.error('Unterminated delimited identifier');
591
+ }
592
+
593
+ this.advance(); // consume closing `
594
+ return this.makeToken(TokenType.DELIMITED_IDENTIFIER, value, start);
595
+ }
596
+
597
+ // Environment Variables
598
+ private scanEnvironmentVariable(): Token {
599
+ const start = this.savePosition();
600
+ this.advance(); // consume %
601
+
602
+ let name: string;
603
+ if (this.peek() === '`') {
604
+ // Delimited: %`vs-name`
605
+ this.advance(); // consume `
606
+ name = this.scanUntil('`');
607
+ this.advance(); // consume closing `
608
+ } else if (this.peek() === "'") {
609
+ // String form: %'string value'
610
+ const stringToken = this.scanString();
611
+ name = stringToken.value;
612
+ } else {
613
+ // Simple: %context
614
+ name = this.scanIdentifier();
615
+ }
616
+
617
+ return this.makeToken(TokenType.ENV_VAR, name, start);
618
+ }
619
+
620
+ // Identifiers and Keywords
621
+ private scanIdentifierOrKeyword(): Token {
622
+ const start = this.savePosition();
623
+ const value = this.scanIdentifier();
624
+
625
+ // Intern the string for efficient comparison
626
+ const internedValue = this.intern(value);
627
+
628
+ // Special handling for boolean literals (to maintain compatibility)
629
+ if (internedValue === 'true') {
630
+ return this.tokenPool.getToken(TokenType.TRUE, internedValue, start);
631
+ }
632
+ if (internedValue === 'false') {
633
+ return this.tokenPool.getToken(TokenType.FALSE, internedValue, start);
634
+ }
635
+
636
+ // Check if it's a keyword using registry
637
+ if (Registry.isKeyword(internedValue)) {
638
+ // First try to find an operator with this name
639
+ const operators = [TokenType.CONTAINS, TokenType.IN, TokenType.AND, TokenType.OR,
640
+ TokenType.XOR, TokenType.IMPLIES, TokenType.AS, TokenType.IS,
641
+ TokenType.NOT, TokenType.MOD, TokenType.DIV];
642
+
643
+ for (const tokenType of operators) {
644
+ const op = Registry.getByToken(tokenType, 'infix') || Registry.getByToken(tokenType, 'prefix');
645
+ if (op && op.name === internedValue) {
646
+ return this.tokenPool.getToken(tokenType, internedValue, start);
647
+ }
648
+ }
649
+
650
+ // Fallback to general lookup
651
+ const op = Registry.get(internedValue);
652
+ if (op && op.kind === 'operator' && op.syntax.token) {
653
+ return this.tokenPool.getToken(op.syntax.token, internedValue, start);
654
+ }
655
+ }
656
+
657
+ // Check for time units (these are handled as UNIT tokens)
658
+ const timeUnits = ['year', 'years', 'month', 'months', 'week', 'weeks',
659
+ 'day', 'days', 'hour', 'hours', 'minute', 'minutes',
660
+ 'second', 'seconds', 'millisecond', 'milliseconds'];
661
+ if (timeUnits.includes(internedValue)) {
662
+ return this.tokenPool.getToken(TokenType.UNIT, internedValue, start);
663
+ }
664
+
665
+ return this.tokenPool.getToken(TokenType.IDENTIFIER, internedValue, start);
666
+ }
667
+
668
+ // Unicode Support
669
+ private isUnicodeIdentifierStart(char: string): boolean {
670
+ // Unicode categories: Letter (L*), Letter Number (Nl)
671
+ return /\p{L}|\p{Nl}/u.test(char);
672
+ }
673
+
674
+ private isUnicodeIdentifierCont(char: string): boolean {
675
+ // Unicode categories: Letter (L*), Number (N*), Mark (M*), Connector Punctuation (Pc)
676
+ return /\p{L}|\p{N}|\p{M}|\p{Pc}/u.test(char);
677
+ }
678
+
679
+ // Comment and Whitespace Handling
680
+ private skipWhitespaceAndComments(preserveTrivia: boolean = false): Token[] {
681
+ const trivia: Token[] = [];
682
+
683
+ while (!this.isAtEnd()) {
684
+ const start = this.savePosition();
685
+ const char = this.peek();
686
+
687
+ if (this.isWhitespace(char)) {
688
+ const ws = this.scanWhitespace();
689
+ if (preserveTrivia) {
690
+ trivia.push(this.makeToken(TokenType.WS, ws, start, Channel.HIDDEN));
691
+ }
692
+ } else if (char === '/' && this.peekNext() === '/') {
693
+ // Single-line comment
694
+ this.advance(); // /
695
+ this.advance(); // /
696
+ const comment = '//' + this.scanUntil('\n');
697
+ if (preserveTrivia) {
698
+ trivia.push(this.makeToken(TokenType.LINE_COMMENT, comment, start, Channel.HIDDEN));
699
+ }
700
+ } else if (char === '/' && this.peekNext() === '*') {
701
+ // Multi-line comment
702
+ this.advance(); // /
703
+ this.advance(); // *
704
+ let comment = '/*';
705
+ while (!this.isAtEnd() && !(this.peek() === '*' && this.peekNext() === '/')) {
706
+ comment += this.advance();
707
+ }
708
+ if (!this.isAtEnd()) {
709
+ comment += this.advance(); // *
710
+ comment += this.advance(); // /
711
+ }
712
+ if (preserveTrivia) {
713
+ trivia.push(this.makeToken(TokenType.COMMENT, comment, start, Channel.HIDDEN));
714
+ }
715
+ } else {
716
+ break;
717
+ }
718
+ }
719
+
720
+ return trivia;
721
+ }
722
+
723
+ private scanWhitespace(): string {
724
+ const start = this.position;
725
+ while (!this.isAtEnd() && this.isWhitespace(this.peek())) {
726
+ this.advance();
727
+ }
728
+ return this.chars.slice(start, this.position).join('');
729
+ }
730
+
731
+ // Error handling
732
+ private error(message: string): LexerError {
733
+ return new LexerError(
734
+ message,
735
+ this.getCurrentPosition(),
736
+ this.peek()
737
+ );
738
+ }
739
+ }
740
+
741
+ // Export convenience function
742
+ export function lex(input: string): Token[] {
743
+ const lexer = new FHIRPathLexer(input);
744
+ return lexer.tokenize();
745
+ }