@atomic-ehr/fhirpath 0.0.1-canary.35b105d.20250724165800
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +307 -0
- package/dist/index.d.ts +225 -0
- package/dist/index.js +8185 -0
- package/dist/index.js.map +1 -0
- package/package.json +51 -0
- package/src/analyzer/analyzer.ts +486 -0
- package/src/analyzer/model-provider.ts +244 -0
- package/src/analyzer/schemas/index.ts +2 -0
- package/src/analyzer/schemas/types.ts +40 -0
- package/src/analyzer/types.ts +142 -0
- package/src/api/builder.ts +148 -0
- package/src/api/errors.ts +134 -0
- package/src/api/expression.ts +152 -0
- package/src/api/index.ts +57 -0
- package/src/api/registry.ts +128 -0
- package/src/api/types.ts +154 -0
- package/src/compiler/compiler.ts +579 -0
- package/src/compiler/index.ts +2 -0
- package/src/compiler/prototype-context-adapter.ts +99 -0
- package/src/compiler/types.ts +23 -0
- package/src/index.ts +52 -0
- package/src/interpreter/README.md +78 -0
- package/src/interpreter/interpreter.ts +485 -0
- package/src/interpreter/types.ts +110 -0
- package/src/lexer/char-tables.ts +37 -0
- package/src/lexer/errors.ts +31 -0
- package/src/lexer/index.ts +5 -0
- package/src/lexer/lexer.ts +745 -0
- package/src/lexer/token.ts +104 -0
- package/src/parser/ast.ts +123 -0
- package/src/parser/index.ts +3 -0
- package/src/parser/parser.ts +701 -0
- package/src/parser/pprint.ts +169 -0
- package/src/registry/default-analyzers.ts +257 -0
- package/src/registry/default-compilers.ts +31 -0
- package/src/registry/index.ts +93 -0
- package/src/registry/operations/arithmetic.ts +506 -0
- package/src/registry/operations/collection.ts +425 -0
- package/src/registry/operations/comparison.ts +432 -0
- package/src/registry/operations/existence.ts +703 -0
- package/src/registry/operations/filtering.ts +358 -0
- package/src/registry/operations/literals.ts +341 -0
- package/src/registry/operations/logical.ts +402 -0
- package/src/registry/operations/math.ts +128 -0
- package/src/registry/operations/membership.ts +132 -0
- package/src/registry/operations/string.ts +507 -0
- package/src/registry/operations/subsetting.ts +174 -0
- package/src/registry/operations/type-checking.ts +162 -0
- package/src/registry/operations/type-conversion.ts +404 -0
- package/src/registry/operations/type-operators.ts +307 -0
- package/src/registry/operations/utility.ts +542 -0
- package/src/registry/registry.ts +146 -0
- package/src/registry/types.ts +161 -0
- package/src/registry/utils/evaluation-helpers.ts +93 -0
- package/src/registry/utils/index.ts +3 -0
- package/src/registry/utils/type-system.ts +173 -0
- package/src/runtime/context.ts +179 -0
|
@@ -0,0 +1,745 @@
|
|
|
1
|
+
import type { Token, Position } from './token';
|
|
2
|
+
import { TokenType, Channel } from './token';
|
|
3
|
+
import { CHAR_FLAGS, FLAG_DIGIT, FLAG_IDENTIFIER_START, FLAG_IDENTIFIER_CONT, FLAG_WHITESPACE } from './char-tables';
|
|
4
|
+
import { LexerError } from './errors';
|
|
5
|
+
import { Registry } from '../registry';
|
|
6
|
+
import type { Operation, Literal } from '../registry';
|
|
7
|
+
|
|
8
|
+
// Token object pool to reduce allocations
|
|
9
|
+
class TokenPool {
|
|
10
|
+
private pool: Token[] = [];
|
|
11
|
+
private poolIndex: number = 0;
|
|
12
|
+
|
|
13
|
+
getToken(type: TokenType, value: string, position: Position): Token {
|
|
14
|
+
if (this.poolIndex < this.pool.length) {
|
|
15
|
+
const token = this.pool[this.poolIndex++]!;
|
|
16
|
+
token.type = type;
|
|
17
|
+
token.value = value;
|
|
18
|
+
token.position = position;
|
|
19
|
+
token.channel = Channel.DEFAULT;
|
|
20
|
+
return token;
|
|
21
|
+
} else {
|
|
22
|
+
const token = { type, value, position, channel: Channel.DEFAULT };
|
|
23
|
+
this.pool.push(token);
|
|
24
|
+
this.poolIndex++;
|
|
25
|
+
return token;
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
reset() {
|
|
30
|
+
this.poolIndex = 0;
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
export class FHIRPathLexer {
|
|
35
|
+
private chars: string[]; // Character array for O(1) access
|
|
36
|
+
private length: number;
|
|
37
|
+
private position: number = 0;
|
|
38
|
+
private line: number = 1;
|
|
39
|
+
private column: number = 1;
|
|
40
|
+
private tokenPool = new TokenPool();
|
|
41
|
+
|
|
42
|
+
// String interning for common tokens
|
|
43
|
+
private readonly internedStrings = new Map<string, string>();
|
|
44
|
+
|
|
45
|
+
constructor(input: string) {
|
|
46
|
+
this.chars = Array.from(input);
|
|
47
|
+
this.length = this.chars.length;
|
|
48
|
+
|
|
49
|
+
// Pre-intern common strings
|
|
50
|
+
const common = ['true', 'false', 'and', 'or', 'where', 'select', 'exists'];
|
|
51
|
+
for (const str of common) {
|
|
52
|
+
this.internedStrings.set(str, str);
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
tokenize(): Token[] {
|
|
57
|
+
const tokens: Token[] = [];
|
|
58
|
+
this.tokenPool.reset();
|
|
59
|
+
|
|
60
|
+
while (!this.isAtEnd()) {
|
|
61
|
+
this.skipWhitespaceAndComments();
|
|
62
|
+
if (this.isAtEnd()) break;
|
|
63
|
+
|
|
64
|
+
const token = this.nextToken();
|
|
65
|
+
if (token) {
|
|
66
|
+
tokens.push(token);
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
tokens.push(this.tokenPool.getToken(TokenType.EOF, '', this.getCurrentPosition()));
|
|
71
|
+
return tokens;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
private nextToken(): Token | null {
|
|
75
|
+
const start = this.savePosition();
|
|
76
|
+
|
|
77
|
+
const char = this.peek();
|
|
78
|
+
const code = char.charCodeAt(0);
|
|
79
|
+
|
|
80
|
+
// Fast path for ASCII characters using switch
|
|
81
|
+
if (code < 128) {
|
|
82
|
+
switch (code) {
|
|
83
|
+
// Whitespace (should have been skipped, but just in case)
|
|
84
|
+
case 32: case 9: case 10: case 13:
|
|
85
|
+
this.advance();
|
|
86
|
+
return null;
|
|
87
|
+
|
|
88
|
+
// Single character tokens
|
|
89
|
+
case 46: return this.makeTokenAndAdvance(TokenType.DOT, '.'); // .
|
|
90
|
+
case 44: return this.makeTokenAndAdvance(TokenType.COMMA, ','); // ,
|
|
91
|
+
case 40: return this.makeTokenAndAdvance(TokenType.LPAREN, '('); // (
|
|
92
|
+
case 41: return this.makeTokenAndAdvance(TokenType.RPAREN, ')'); // )
|
|
93
|
+
case 91: return this.makeTokenAndAdvance(TokenType.LBRACKET, '['); // [
|
|
94
|
+
case 93: return this.makeTokenAndAdvance(TokenType.RBRACKET, ']'); // ]
|
|
95
|
+
case 43: return this.makeTokenAndAdvance(TokenType.PLUS, '+'); // +
|
|
96
|
+
case 45: return this.makeTokenAndAdvance(TokenType.MINUS, '-'); // -
|
|
97
|
+
case 42: return this.makeTokenAndAdvance(TokenType.STAR, '*'); // *
|
|
98
|
+
case 47: return this.scanSlashOrComment(); // /
|
|
99
|
+
case 38: return this.makeTokenAndAdvance(TokenType.CONCAT, '&'); // &
|
|
100
|
+
case 124: return this.makeTokenAndAdvance(TokenType.PIPE, '|'); // |
|
|
101
|
+
case 61: return this.makeTokenAndAdvance(TokenType.EQ, '='); // =
|
|
102
|
+
case 126: return this.makeTokenAndAdvance(TokenType.EQUIV, '~'); // ~
|
|
103
|
+
|
|
104
|
+
// Multi-character tokens
|
|
105
|
+
case 60: return this.scanLessThan(); // < or <=
|
|
106
|
+
case 62: return this.scanGreaterThan(); // > or >=
|
|
107
|
+
case 33: return this.scanExclamation(); // != or !~
|
|
108
|
+
|
|
109
|
+
// Complex tokens
|
|
110
|
+
case 39: return this.scanString(); // '
|
|
111
|
+
case 96: return this.scanDelimitedIdentifier(); // `
|
|
112
|
+
case 64: return this.scanDateTime(); // @
|
|
113
|
+
case 37: return this.scanEnvironmentVariable(); // %
|
|
114
|
+
case 36: return this.scanSpecialVariable(); // $
|
|
115
|
+
case 123: return this.makeTokenAndAdvance(TokenType.LBRACE, '{'); // {
|
|
116
|
+
case 125: return this.makeTokenAndAdvance(TokenType.RBRACE, '}'); // }
|
|
117
|
+
|
|
118
|
+
default:
|
|
119
|
+
// Use lookup table for classification
|
|
120
|
+
if ((CHAR_FLAGS[code]! & FLAG_DIGIT) !== 0) {
|
|
121
|
+
return this.scanNumber();
|
|
122
|
+
}
|
|
123
|
+
if ((CHAR_FLAGS[code]! & FLAG_IDENTIFIER_START) !== 0) {
|
|
124
|
+
return this.scanIdentifierOrKeyword();
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
// Fallback for non-ASCII
|
|
130
|
+
if (this.isIdentifierStart(char)) {
|
|
131
|
+
return this.scanIdentifierOrKeyword();
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
throw this.error(`Unexpected character: ${char}`);
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
// Multi-character operator scanners
|
|
138
|
+
private scanSlashOrComment(): Token | null {
|
|
139
|
+
if (this.peekNext() === '/') {
|
|
140
|
+
// Single-line comment - skip it
|
|
141
|
+
this.skipWhitespaceAndComments();
|
|
142
|
+
return null;
|
|
143
|
+
} else if (this.peekNext() === '*') {
|
|
144
|
+
// Multi-line comment - skip it
|
|
145
|
+
this.skipWhitespaceAndComments();
|
|
146
|
+
return null;
|
|
147
|
+
} else {
|
|
148
|
+
// Division operator
|
|
149
|
+
return this.makeTokenAndAdvance(TokenType.SLASH, '/');
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
private scanLessThan(): Token {
|
|
154
|
+
const start = this.savePosition();
|
|
155
|
+
this.advance(); // <
|
|
156
|
+
|
|
157
|
+
if (this.peek() === '=') {
|
|
158
|
+
this.advance(); // =
|
|
159
|
+
return this.makeToken(TokenType.LTE, '<=', start);
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
return this.makeToken(TokenType.LT, '<', start);
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
private scanGreaterThan(): Token {
|
|
166
|
+
const start = this.savePosition();
|
|
167
|
+
this.advance(); // >
|
|
168
|
+
|
|
169
|
+
if (this.peek() === '=') {
|
|
170
|
+
this.advance(); // =
|
|
171
|
+
return this.makeToken(TokenType.GTE, '>=', start);
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
return this.makeToken(TokenType.GT, '>', start);
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
private scanExclamation(): Token {
|
|
178
|
+
const start = this.savePosition();
|
|
179
|
+
this.advance(); // !
|
|
180
|
+
|
|
181
|
+
if (this.peek() === '=') {
|
|
182
|
+
this.advance(); // =
|
|
183
|
+
return this.makeToken(TokenType.NEQ, '!=', start);
|
|
184
|
+
} else if (this.peek() === '~') {
|
|
185
|
+
this.advance(); // ~
|
|
186
|
+
return this.makeToken(TokenType.NEQUIV, '!~', start);
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
throw this.error('Expected "=" or "~" after "!"');
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
// Fast character classification using lookup table
|
|
193
|
+
private isDigit(char: string): boolean {
|
|
194
|
+
const code = char.charCodeAt(0);
|
|
195
|
+
return code < 128 && (CHAR_FLAGS[code]! & FLAG_DIGIT) !== 0;
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
private isIdentifierStart(char: string): boolean {
|
|
199
|
+
const code = char.charCodeAt(0);
|
|
200
|
+
if (code < 128) {
|
|
201
|
+
return (CHAR_FLAGS[code]! & FLAG_IDENTIFIER_START) !== 0;
|
|
202
|
+
}
|
|
203
|
+
return this.isUnicodeIdentifierStart(char);
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
private isWhitespace(char: string): boolean {
|
|
207
|
+
const code = char.charCodeAt(0);
|
|
208
|
+
return code < 128 && (CHAR_FLAGS[code]! & FLAG_WHITESPACE) !== 0;
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
// O(1) character access
|
|
212
|
+
private peek(offset: number = 0): string {
|
|
213
|
+
const pos = this.position + offset;
|
|
214
|
+
return pos < this.length ? this.chars[pos]! : '\0';
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
// String interning for memory efficiency
|
|
218
|
+
private intern(str: string): string {
|
|
219
|
+
if (str.length <= 10) {
|
|
220
|
+
const interned = this.internedStrings.get(str);
|
|
221
|
+
if (interned) return interned;
|
|
222
|
+
this.internedStrings.set(str, str);
|
|
223
|
+
}
|
|
224
|
+
return str;
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
// Helper Methods
|
|
228
|
+
|
|
229
|
+
// Position and Character Navigation
|
|
230
|
+
private savePosition(): Position {
|
|
231
|
+
return {
|
|
232
|
+
line: this.line,
|
|
233
|
+
column: this.column,
|
|
234
|
+
offset: this.position
|
|
235
|
+
};
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
private getCurrentPosition(): Position {
|
|
239
|
+
return this.savePosition();
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
private advance(): string {
|
|
243
|
+
const char = this.chars[this.position++]!;
|
|
244
|
+
|
|
245
|
+
// Update line/column for newlines
|
|
246
|
+
if (char === '\n') {
|
|
247
|
+
this.line++;
|
|
248
|
+
this.column = 1;
|
|
249
|
+
} else {
|
|
250
|
+
this.column++;
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
return char;
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
private isAtEnd(): boolean {
|
|
257
|
+
return this.position >= this.length;
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
private peekNext(): string {
|
|
261
|
+
return this.peek(1);
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
// Scanning Utilities
|
|
265
|
+
private scanDigits(count?: number): string | null {
|
|
266
|
+
const start = this.position;
|
|
267
|
+
let scanned = 0;
|
|
268
|
+
|
|
269
|
+
while (!this.isAtEnd() && this.isDigit(this.peek())) {
|
|
270
|
+
this.advance();
|
|
271
|
+
scanned++;
|
|
272
|
+
if (count !== undefined && scanned >= count) break;
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
if (count !== undefined && scanned < count) {
|
|
276
|
+
return null; // Didn't get required number of digits
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
return this.chars.slice(start, this.position).join('');
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
private scanIdentifier(): string {
|
|
283
|
+
const start = this.position;
|
|
284
|
+
|
|
285
|
+
while (!this.isAtEnd()) {
|
|
286
|
+
const char = this.peek();
|
|
287
|
+
const code = char.charCodeAt(0);
|
|
288
|
+
|
|
289
|
+
if (code < 128) {
|
|
290
|
+
if ((CHAR_FLAGS[code]! & FLAG_IDENTIFIER_CONT) === 0) break;
|
|
291
|
+
} else {
|
|
292
|
+
// Handle non-ASCII Unicode letters/digits
|
|
293
|
+
if (!this.isUnicodeIdentifierCont(char)) break;
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
this.advance();
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
return this.chars.slice(start, this.position).join('');
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
private scanUntil(target: string): string {
|
|
303
|
+
const start = this.position;
|
|
304
|
+
|
|
305
|
+
while (!this.isAtEnd() && this.peek() !== target) {
|
|
306
|
+
this.advance();
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
return this.chars.slice(start, this.position).join('');
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
private getTextFromPosition(start: Position): string {
|
|
313
|
+
return this.chars.slice(start.offset, this.position).join('');
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
// Token Creation
|
|
317
|
+
private makeToken(
|
|
318
|
+
type: TokenType,
|
|
319
|
+
value: string,
|
|
320
|
+
start: Position,
|
|
321
|
+
channel: Channel = Channel.DEFAULT
|
|
322
|
+
): Token {
|
|
323
|
+
const token = this.tokenPool.getToken(type, value, start);
|
|
324
|
+
token.channel = channel;
|
|
325
|
+
return token;
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
private makeTokenAndAdvance(type: TokenType, value: string): Token {
|
|
329
|
+
const start = this.savePosition();
|
|
330
|
+
this.advance();
|
|
331
|
+
return this.makeToken(type, value, start);
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
// Complex Token Scanners
|
|
335
|
+
|
|
336
|
+
private scanString(): Token {
|
|
337
|
+
const start = this.savePosition();
|
|
338
|
+
this.advance(); // consume opening '
|
|
339
|
+
|
|
340
|
+
let value = '';
|
|
341
|
+
while (!this.isAtEnd() && this.peek() !== "'") {
|
|
342
|
+
if (this.peek() === '\\') {
|
|
343
|
+
this.advance();
|
|
344
|
+
value += this.scanEscapeSequence();
|
|
345
|
+
} else {
|
|
346
|
+
value += this.advance();
|
|
347
|
+
}
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
if (this.isAtEnd()) {
|
|
351
|
+
throw this.error('Unterminated string');
|
|
352
|
+
}
|
|
353
|
+
|
|
354
|
+
this.advance(); // consume closing '
|
|
355
|
+
return this.makeToken(TokenType.STRING, value, start);
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
private scanEscapeSequence(): string {
|
|
359
|
+
const char = this.advance();
|
|
360
|
+
switch (char) {
|
|
361
|
+
case '`': return '`';
|
|
362
|
+
case "'": return "'";
|
|
363
|
+
case '\\': return '\\';
|
|
364
|
+
case '/': return '/';
|
|
365
|
+
case 'f': return '\f';
|
|
366
|
+
case 'n': return '\n';
|
|
367
|
+
case 'r': return '\r';
|
|
368
|
+
case 't': return '\t';
|
|
369
|
+
case 'u': return this.scanUnicodeEscape();
|
|
370
|
+
default: throw this.error(`Invalid escape sequence: \\${char}`);
|
|
371
|
+
}
|
|
372
|
+
}
|
|
373
|
+
|
|
374
|
+
private scanUnicodeEscape(): string {
|
|
375
|
+
// \uXXXX - exactly 4 hex digits
|
|
376
|
+
let code = 0;
|
|
377
|
+
for (let i = 0; i < 4; i++) {
|
|
378
|
+
const char = this.peek();
|
|
379
|
+
const digit = this.hexDigitValue(char);
|
|
380
|
+
if (digit === -1) {
|
|
381
|
+
throw this.error(`Invalid unicode escape sequence: expected hex digit, got '${char}'`);
|
|
382
|
+
}
|
|
383
|
+
code = code * 16 + digit;
|
|
384
|
+
this.advance();
|
|
385
|
+
}
|
|
386
|
+
return String.fromCharCode(code);
|
|
387
|
+
}
|
|
388
|
+
|
|
389
|
+
private hexDigitValue(char: string): number {
|
|
390
|
+
const code = char.charCodeAt(0);
|
|
391
|
+
if (code >= 48 && code <= 57) return code - 48; // 0-9
|
|
392
|
+
if (code >= 65 && code <= 70) return code - 65 + 10; // A-F
|
|
393
|
+
if (code >= 97 && code <= 102) return code - 97 + 10; // a-f
|
|
394
|
+
return -1;
|
|
395
|
+
}
|
|
396
|
+
|
|
397
|
+
// Special Variables
|
|
398
|
+
private scanSpecialVariable(): Token {
|
|
399
|
+
const start = this.savePosition();
|
|
400
|
+
this.advance(); // consume $
|
|
401
|
+
|
|
402
|
+
const name = this.scanIdentifier();
|
|
403
|
+
const fullName = '$' + name;
|
|
404
|
+
|
|
405
|
+
if (name === 'this') {
|
|
406
|
+
return this.makeToken(TokenType.THIS, fullName, start);
|
|
407
|
+
} else if (name === 'index') {
|
|
408
|
+
return this.makeToken(TokenType.INDEX, fullName, start);
|
|
409
|
+
} else if (name === 'total') {
|
|
410
|
+
return this.makeToken(TokenType.TOTAL, fullName, start);
|
|
411
|
+
} else {
|
|
412
|
+
throw this.error(`Invalid special variable: ${fullName}`);
|
|
413
|
+
}
|
|
414
|
+
}
|
|
415
|
+
|
|
416
|
+
// Date/Time Literals
|
|
417
|
+
private scanDateTime(): Token {
|
|
418
|
+
const start = this.savePosition();
|
|
419
|
+
this.advance(); // consume @
|
|
420
|
+
|
|
421
|
+
// Check for time-only literal: @T14:30:00
|
|
422
|
+
if (this.peek() === 'T') {
|
|
423
|
+
this.advance(); // consume T
|
|
424
|
+
const timeFormat = this.scanTimeFormat();
|
|
425
|
+
if (!timeFormat) {
|
|
426
|
+
throw this.error('Invalid time format: expected time after @T');
|
|
427
|
+
}
|
|
428
|
+
return this.makeToken(TokenType.TIME, '@T' + timeFormat, start);
|
|
429
|
+
}
|
|
430
|
+
|
|
431
|
+
// Date, DateTime, or partial date literal
|
|
432
|
+
let value = '@';
|
|
433
|
+
|
|
434
|
+
// Year is required
|
|
435
|
+
const year = this.scanDigits(4);
|
|
436
|
+
if (!year) throw this.error('Invalid date/time format: expected 4-digit year');
|
|
437
|
+
value += year;
|
|
438
|
+
|
|
439
|
+
// Month is optional
|
|
440
|
+
if (this.peek() === '-') {
|
|
441
|
+
value += this.advance(); // -
|
|
442
|
+
const month = this.scanDigits(2);
|
|
443
|
+
if (!month) throw this.error('Invalid month');
|
|
444
|
+
value += month;
|
|
445
|
+
|
|
446
|
+
// Day is optional if month is present
|
|
447
|
+
if (this.peek() === '-') {
|
|
448
|
+
value += this.advance(); // -
|
|
449
|
+
const day = this.scanDigits(2);
|
|
450
|
+
if (!day) throw this.error('Invalid day');
|
|
451
|
+
value += day;
|
|
452
|
+
|
|
453
|
+
// Time component is optional
|
|
454
|
+
if (this.peek() === 'T') {
|
|
455
|
+
value += this.advance(); // T
|
|
456
|
+
const timeFormat = this.scanTimeFormat();
|
|
457
|
+
if (timeFormat) {
|
|
458
|
+
value += timeFormat;
|
|
459
|
+
}
|
|
460
|
+
}
|
|
461
|
+
} else if (this.peek() === 'T') {
|
|
462
|
+
// Month without day but with time (rare but allowed)
|
|
463
|
+
value += this.advance(); // T
|
|
464
|
+
const timeFormat = this.scanTimeFormat();
|
|
465
|
+
if (timeFormat) {
|
|
466
|
+
value += timeFormat;
|
|
467
|
+
}
|
|
468
|
+
}
|
|
469
|
+
} else if (this.peek() === 'T') {
|
|
470
|
+
// Year with time but no month/day (also rare but allowed)
|
|
471
|
+
value += this.advance(); // T
|
|
472
|
+
const timeFormat = this.scanTimeFormat();
|
|
473
|
+
if (timeFormat) {
|
|
474
|
+
value += timeFormat;
|
|
475
|
+
}
|
|
476
|
+
}
|
|
477
|
+
|
|
478
|
+
// Timezone is optional
|
|
479
|
+
if (this.peek() === 'Z' || this.peek() === '+' || this.peek() === '-') {
|
|
480
|
+
value += this.scanTimezone();
|
|
481
|
+
}
|
|
482
|
+
|
|
483
|
+
// Determine token type based on content
|
|
484
|
+
const tokenType = value.includes('T') ? TokenType.DATETIME : TokenType.DATE;
|
|
485
|
+
return this.makeToken(tokenType, value, start);
|
|
486
|
+
}
|
|
487
|
+
|
|
488
|
+
private scanTimeFormat(): string {
|
|
489
|
+
// TIMEFORMAT: [0-9][0-9] (':'[0-9][0-9] (':'[0-9][0-9] ('.'[0-9]+)?)?)?
|
|
490
|
+
let time = '';
|
|
491
|
+
|
|
492
|
+
// Hour is required
|
|
493
|
+
const hour = this.scanDigits(2);
|
|
494
|
+
if (!hour) return ''; // Empty time allowed in some contexts
|
|
495
|
+
time += hour;
|
|
496
|
+
|
|
497
|
+
// Minutes optional
|
|
498
|
+
if (this.peek() === ':') {
|
|
499
|
+
time += this.advance(); // :
|
|
500
|
+
const minute = this.scanDigits(2);
|
|
501
|
+
if (!minute) throw this.error('Invalid time format: expected 2-digit minute');
|
|
502
|
+
time += minute;
|
|
503
|
+
|
|
504
|
+
// Seconds optional
|
|
505
|
+
if (this.peek() === ':') {
|
|
506
|
+
time += this.advance(); // :
|
|
507
|
+
const second = this.scanDigits(2);
|
|
508
|
+
if (!second) throw this.error('Invalid time format: expected 2-digit second');
|
|
509
|
+
time += second;
|
|
510
|
+
|
|
511
|
+
// Fractional seconds optional
|
|
512
|
+
if (this.peek() === '.') {
|
|
513
|
+
time += this.advance(); // .
|
|
514
|
+
const fraction = this.scanDigits();
|
|
515
|
+
if (!fraction) throw this.error('Invalid time format: expected fractional seconds');
|
|
516
|
+
time += fraction;
|
|
517
|
+
}
|
|
518
|
+
}
|
|
519
|
+
}
|
|
520
|
+
|
|
521
|
+
return time;
|
|
522
|
+
}
|
|
523
|
+
|
|
524
|
+
private scanTimezone(): string {
|
|
525
|
+
const char = this.peek();
|
|
526
|
+
|
|
527
|
+
if (char === 'Z') {
|
|
528
|
+
this.advance();
|
|
529
|
+
return 'Z';
|
|
530
|
+
}
|
|
531
|
+
|
|
532
|
+
if (char === '+' || char === '-') {
|
|
533
|
+
let tz = this.advance(); // + or -
|
|
534
|
+
const hour = this.scanDigits(2);
|
|
535
|
+
if (!hour) throw this.error('Invalid timezone: expected 2-digit hour');
|
|
536
|
+
tz += hour;
|
|
537
|
+
|
|
538
|
+
if (this.peek() !== ':') {
|
|
539
|
+
throw this.error('Invalid timezone: expected ":" after hour');
|
|
540
|
+
}
|
|
541
|
+
tz += this.advance(); // :
|
|
542
|
+
|
|
543
|
+
const minute = this.scanDigits(2);
|
|
544
|
+
if (!minute) throw this.error('Invalid timezone: expected 2-digit minute');
|
|
545
|
+
tz += minute;
|
|
546
|
+
|
|
547
|
+
return tz;
|
|
548
|
+
}
|
|
549
|
+
|
|
550
|
+
return '';
|
|
551
|
+
}
|
|
552
|
+
|
|
553
|
+
// Number Literals
|
|
554
|
+
private scanNumber(): Token {
|
|
555
|
+
const start = this.savePosition();
|
|
556
|
+
|
|
557
|
+
// Allow leading zeros (e.g., 0123)
|
|
558
|
+
while (this.isDigit(this.peek())) {
|
|
559
|
+
this.advance();
|
|
560
|
+
}
|
|
561
|
+
|
|
562
|
+
// Check for decimal point
|
|
563
|
+
if (this.peek() === '.' && this.isDigit(this.peekNext())) {
|
|
564
|
+
this.advance(); // consume .
|
|
565
|
+
while (this.isDigit(this.peek())) {
|
|
566
|
+
this.advance();
|
|
567
|
+
}
|
|
568
|
+
}
|
|
569
|
+
|
|
570
|
+
const value = this.getTextFromPosition(start);
|
|
571
|
+
return this.makeToken(TokenType.NUMBER, value, start);
|
|
572
|
+
}
|
|
573
|
+
|
|
574
|
+
// Delimited Identifiers
|
|
575
|
+
private scanDelimitedIdentifier(): Token {
|
|
576
|
+
const start = this.savePosition();
|
|
577
|
+
this.advance(); // consume opening `
|
|
578
|
+
|
|
579
|
+
let value = '';
|
|
580
|
+
while (!this.isAtEnd() && this.peek() !== '`') {
|
|
581
|
+
if (this.peek() === '\\') {
|
|
582
|
+
this.advance();
|
|
583
|
+
value += this.scanEscapeSequence();
|
|
584
|
+
} else {
|
|
585
|
+
value += this.advance();
|
|
586
|
+
}
|
|
587
|
+
}
|
|
588
|
+
|
|
589
|
+
if (this.isAtEnd()) {
|
|
590
|
+
throw this.error('Unterminated delimited identifier');
|
|
591
|
+
}
|
|
592
|
+
|
|
593
|
+
this.advance(); // consume closing `
|
|
594
|
+
return this.makeToken(TokenType.DELIMITED_IDENTIFIER, value, start);
|
|
595
|
+
}
|
|
596
|
+
|
|
597
|
+
// Environment Variables
|
|
598
|
+
private scanEnvironmentVariable(): Token {
|
|
599
|
+
const start = this.savePosition();
|
|
600
|
+
this.advance(); // consume %
|
|
601
|
+
|
|
602
|
+
let name: string;
|
|
603
|
+
if (this.peek() === '`') {
|
|
604
|
+
// Delimited: %`vs-name`
|
|
605
|
+
this.advance(); // consume `
|
|
606
|
+
name = this.scanUntil('`');
|
|
607
|
+
this.advance(); // consume closing `
|
|
608
|
+
} else if (this.peek() === "'") {
|
|
609
|
+
// String form: %'string value'
|
|
610
|
+
const stringToken = this.scanString();
|
|
611
|
+
name = stringToken.value;
|
|
612
|
+
} else {
|
|
613
|
+
// Simple: %context
|
|
614
|
+
name = this.scanIdentifier();
|
|
615
|
+
}
|
|
616
|
+
|
|
617
|
+
return this.makeToken(TokenType.ENV_VAR, name, start);
|
|
618
|
+
}
|
|
619
|
+
|
|
620
|
+
// Identifiers and Keywords
|
|
621
|
+
private scanIdentifierOrKeyword(): Token {
|
|
622
|
+
const start = this.savePosition();
|
|
623
|
+
const value = this.scanIdentifier();
|
|
624
|
+
|
|
625
|
+
// Intern the string for efficient comparison
|
|
626
|
+
const internedValue = this.intern(value);
|
|
627
|
+
|
|
628
|
+
// Special handling for boolean literals (to maintain compatibility)
|
|
629
|
+
if (internedValue === 'true') {
|
|
630
|
+
return this.tokenPool.getToken(TokenType.TRUE, internedValue, start);
|
|
631
|
+
}
|
|
632
|
+
if (internedValue === 'false') {
|
|
633
|
+
return this.tokenPool.getToken(TokenType.FALSE, internedValue, start);
|
|
634
|
+
}
|
|
635
|
+
|
|
636
|
+
// Check if it's a keyword using registry
|
|
637
|
+
if (Registry.isKeyword(internedValue)) {
|
|
638
|
+
// First try to find an operator with this name
|
|
639
|
+
const operators = [TokenType.CONTAINS, TokenType.IN, TokenType.AND, TokenType.OR,
|
|
640
|
+
TokenType.XOR, TokenType.IMPLIES, TokenType.AS, TokenType.IS,
|
|
641
|
+
TokenType.NOT, TokenType.MOD, TokenType.DIV];
|
|
642
|
+
|
|
643
|
+
for (const tokenType of operators) {
|
|
644
|
+
const op = Registry.getByToken(tokenType, 'infix') || Registry.getByToken(tokenType, 'prefix');
|
|
645
|
+
if (op && op.name === internedValue) {
|
|
646
|
+
return this.tokenPool.getToken(tokenType, internedValue, start);
|
|
647
|
+
}
|
|
648
|
+
}
|
|
649
|
+
|
|
650
|
+
// Fallback to general lookup
|
|
651
|
+
const op = Registry.get(internedValue);
|
|
652
|
+
if (op && op.kind === 'operator' && op.syntax.token) {
|
|
653
|
+
return this.tokenPool.getToken(op.syntax.token, internedValue, start);
|
|
654
|
+
}
|
|
655
|
+
}
|
|
656
|
+
|
|
657
|
+
// Check for time units (these are handled as UNIT tokens)
|
|
658
|
+
const timeUnits = ['year', 'years', 'month', 'months', 'week', 'weeks',
|
|
659
|
+
'day', 'days', 'hour', 'hours', 'minute', 'minutes',
|
|
660
|
+
'second', 'seconds', 'millisecond', 'milliseconds'];
|
|
661
|
+
if (timeUnits.includes(internedValue)) {
|
|
662
|
+
return this.tokenPool.getToken(TokenType.UNIT, internedValue, start);
|
|
663
|
+
}
|
|
664
|
+
|
|
665
|
+
return this.tokenPool.getToken(TokenType.IDENTIFIER, internedValue, start);
|
|
666
|
+
}
|
|
667
|
+
|
|
668
|
+
// Unicode Support
|
|
669
|
+
private isUnicodeIdentifierStart(char: string): boolean {
|
|
670
|
+
// Unicode categories: Letter (L*), Letter Number (Nl)
|
|
671
|
+
return /\p{L}|\p{Nl}/u.test(char);
|
|
672
|
+
}
|
|
673
|
+
|
|
674
|
+
private isUnicodeIdentifierCont(char: string): boolean {
|
|
675
|
+
// Unicode categories: Letter (L*), Number (N*), Mark (M*), Connector Punctuation (Pc)
|
|
676
|
+
return /\p{L}|\p{N}|\p{M}|\p{Pc}/u.test(char);
|
|
677
|
+
}
|
|
678
|
+
|
|
679
|
+
// Comment and Whitespace Handling
|
|
680
|
+
private skipWhitespaceAndComments(preserveTrivia: boolean = false): Token[] {
|
|
681
|
+
const trivia: Token[] = [];
|
|
682
|
+
|
|
683
|
+
while (!this.isAtEnd()) {
|
|
684
|
+
const start = this.savePosition();
|
|
685
|
+
const char = this.peek();
|
|
686
|
+
|
|
687
|
+
if (this.isWhitespace(char)) {
|
|
688
|
+
const ws = this.scanWhitespace();
|
|
689
|
+
if (preserveTrivia) {
|
|
690
|
+
trivia.push(this.makeToken(TokenType.WS, ws, start, Channel.HIDDEN));
|
|
691
|
+
}
|
|
692
|
+
} else if (char === '/' && this.peekNext() === '/') {
|
|
693
|
+
// Single-line comment
|
|
694
|
+
this.advance(); // /
|
|
695
|
+
this.advance(); // /
|
|
696
|
+
const comment = '//' + this.scanUntil('\n');
|
|
697
|
+
if (preserveTrivia) {
|
|
698
|
+
trivia.push(this.makeToken(TokenType.LINE_COMMENT, comment, start, Channel.HIDDEN));
|
|
699
|
+
}
|
|
700
|
+
} else if (char === '/' && this.peekNext() === '*') {
|
|
701
|
+
// Multi-line comment
|
|
702
|
+
this.advance(); // /
|
|
703
|
+
this.advance(); // *
|
|
704
|
+
let comment = '/*';
|
|
705
|
+
while (!this.isAtEnd() && !(this.peek() === '*' && this.peekNext() === '/')) {
|
|
706
|
+
comment += this.advance();
|
|
707
|
+
}
|
|
708
|
+
if (!this.isAtEnd()) {
|
|
709
|
+
comment += this.advance(); // *
|
|
710
|
+
comment += this.advance(); // /
|
|
711
|
+
}
|
|
712
|
+
if (preserveTrivia) {
|
|
713
|
+
trivia.push(this.makeToken(TokenType.COMMENT, comment, start, Channel.HIDDEN));
|
|
714
|
+
}
|
|
715
|
+
} else {
|
|
716
|
+
break;
|
|
717
|
+
}
|
|
718
|
+
}
|
|
719
|
+
|
|
720
|
+
return trivia;
|
|
721
|
+
}
|
|
722
|
+
|
|
723
|
+
private scanWhitespace(): string {
|
|
724
|
+
const start = this.position;
|
|
725
|
+
while (!this.isAtEnd() && this.isWhitespace(this.peek())) {
|
|
726
|
+
this.advance();
|
|
727
|
+
}
|
|
728
|
+
return this.chars.slice(start, this.position).join('');
|
|
729
|
+
}
|
|
730
|
+
|
|
731
|
+
// Error handling
|
|
732
|
+
private error(message: string): LexerError {
|
|
733
|
+
return new LexerError(
|
|
734
|
+
message,
|
|
735
|
+
this.getCurrentPosition(),
|
|
736
|
+
this.peek()
|
|
737
|
+
);
|
|
738
|
+
}
|
|
739
|
+
}
|
|
740
|
+
|
|
741
|
+
// Export convenience function
|
|
742
|
+
export function lex(input: string): Token[] {
|
|
743
|
+
const lexer = new FHIRPathLexer(input);
|
|
744
|
+
return lexer.tokenize();
|
|
745
|
+
}
|