@atomic-ehr/fhirpath 0.0.1-canary.0c6931e.20250727185306
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +473 -0
- package/dist/index.d.ts +462 -0
- package/dist/index.js +10307 -0
- package/dist/index.js.map +1 -0
- package/package.json +58 -0
- package/src/analyzer/analyzer.ts +499 -0
- package/src/analyzer/model-provider.ts +244 -0
- package/src/analyzer/schemas/index.ts +2 -0
- package/src/analyzer/schemas/types.ts +40 -0
- package/src/analyzer/types.ts +142 -0
- package/src/api/builder.ts +157 -0
- package/src/api/errors.ts +145 -0
- package/src/api/expression.ts +156 -0
- package/src/api/index.ts +122 -0
- package/src/api/inspect.ts +99 -0
- package/src/api/registry.ts +128 -0
- package/src/api/types.ts +210 -0
- package/src/compiler/compiler.ts +546 -0
- package/src/compiler/index.ts +2 -0
- package/src/compiler/prototype-context-adapter.ts +99 -0
- package/src/compiler/types.ts +24 -0
- package/src/index.ts +107 -0
- package/src/interpreter/README.md +78 -0
- package/src/interpreter/interpreter.ts +475 -0
- package/src/interpreter/types.ts +108 -0
- package/src/lexer/char-tables.ts +37 -0
- package/src/lexer/errors.ts +31 -0
- package/src/lexer/index.ts +5 -0
- package/src/lexer/lexer.ts +745 -0
- package/src/lexer/token.ts +104 -0
- package/src/lexer2/index.md +232 -0
- package/src/lexer2/index.perf.test.ts +68 -0
- package/src/lexer2/index.test.ts +549 -0
- package/src/lexer2/index.ts +1251 -0
- package/src/lexer2/notes.md +173 -0
- package/src/lexer2/optimization-summary.md +718 -0
- package/src/parser/ast-factory.ts +220 -0
- package/src/parser/ast.ts +144 -0
- package/src/parser/collection-parser.ts +89 -0
- package/src/parser/diagnostic-messages.ts +216 -0
- package/src/parser/diagnostics.ts +85 -0
- package/src/parser/error-reporter.ts +230 -0
- package/src/parser/index.ts +3 -0
- package/src/parser/literal-parser.ts +103 -0
- package/src/parser/parse-error.ts +16 -0
- package/src/parser/parser-error-factory.ts +141 -0
- package/src/parser/parser-state.ts +134 -0
- package/src/parser/parser.ts +1272 -0
- package/src/parser/pprint.ts +169 -0
- package/src/parser/precedence-manager.ts +64 -0
- package/src/parser/source-mapper.ts +248 -0
- package/src/parser/special-constructs.ts +142 -0
- package/src/parser/token-navigator.ts +110 -0
- package/src/parser/types.ts +60 -0
- package/src/parser2/index.md +177 -0
- package/src/parser2/index.perf.test.ts +184 -0
- package/src/parser2/index.test.ts +305 -0
- package/src/parser2/index.ts +578 -0
- package/src/parser2/optimization-summary.md +176 -0
- package/src/registry/default-analyzers.ts +257 -0
- package/src/registry/default-compilers.ts +31 -0
- package/src/registry/index.ts +96 -0
- package/src/registry/operations/arithmetic.ts +506 -0
- package/src/registry/operations/collection.ts +425 -0
- package/src/registry/operations/comparison.ts +432 -0
- package/src/registry/operations/existence.ts +703 -0
- package/src/registry/operations/filtering.ts +358 -0
- package/src/registry/operations/literals.ts +341 -0
- package/src/registry/operations/logical.ts +439 -0
- package/src/registry/operations/math.ts +128 -0
- package/src/registry/operations/membership.ts +132 -0
- package/src/registry/operations/navigation.ts +52 -0
- package/src/registry/operations/string.ts +507 -0
- package/src/registry/operations/subsetting.ts +174 -0
- package/src/registry/operations/type-checking.ts +162 -0
- package/src/registry/operations/type-conversion.ts +404 -0
- package/src/registry/operations/type-operators.ts +308 -0
- package/src/registry/operations/utility.ts +644 -0
- package/src/registry/registry.ts +146 -0
- package/src/registry/types.ts +161 -0
- package/src/registry/utils/evaluation-helpers.ts +93 -0
- package/src/registry/utils/index.ts +3 -0
- package/src/registry/utils/type-system.ts +173 -0
- package/src/runtime/context.ts +158 -0
- package/src/runtime/debug-context.ts +135 -0
|
@@ -0,0 +1,549 @@
|
|
|
1
|
+
import { describe, it, expect } from 'bun:test';
|
|
2
|
+
import { Lexer, TokenType, Channel } from './index';
|
|
3
|
+
|
|
4
|
+
describe('Lexer', () => {
|
|
5
|
+
function getTokenTypes(expression: string): TokenType[] {
|
|
6
|
+
const lexer = new Lexer(expression);
|
|
7
|
+
const tokens = lexer.tokenize();
|
|
8
|
+
return tokens.map(t => t.type);
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
describe('literals', () => {
|
|
12
|
+
it('tokenizes null literal', () => {
|
|
13
|
+
expect(getTokenTypes('{}')).toEqual([
|
|
14
|
+
TokenType.LBRACE,
|
|
15
|
+
TokenType.RBRACE,
|
|
16
|
+
TokenType.EOF
|
|
17
|
+
]);
|
|
18
|
+
});
|
|
19
|
+
|
|
20
|
+
it('tokenizes boolean literals', () => {
|
|
21
|
+
expect(getTokenTypes('true false')).toEqual([
|
|
22
|
+
TokenType.TRUE,
|
|
23
|
+
TokenType.FALSE,
|
|
24
|
+
TokenType.EOF
|
|
25
|
+
]);
|
|
26
|
+
});
|
|
27
|
+
|
|
28
|
+
it('tokenizes string literals', () => {
|
|
29
|
+
// Single-quoted strings
|
|
30
|
+
expect(getTokenTypes("'hello world'")).toEqual([
|
|
31
|
+
TokenType.STRING,
|
|
32
|
+
TokenType.EOF
|
|
33
|
+
]);
|
|
34
|
+
|
|
35
|
+
expect(getTokenTypes("'hello\\nworld\\t\\r\\\\'")).toEqual([
|
|
36
|
+
TokenType.STRING,
|
|
37
|
+
TokenType.EOF
|
|
38
|
+
]);
|
|
39
|
+
|
|
40
|
+
expect(getTokenTypes("'\\u0048\\u0065\\u006C\\u006C\\u006F'")).toEqual([
|
|
41
|
+
TokenType.STRING,
|
|
42
|
+
TokenType.EOF
|
|
43
|
+
]);
|
|
44
|
+
|
|
45
|
+
// Double-quoted strings
|
|
46
|
+
expect(getTokenTypes('"hello world"')).toEqual([
|
|
47
|
+
TokenType.STRING,
|
|
48
|
+
TokenType.EOF
|
|
49
|
+
]);
|
|
50
|
+
|
|
51
|
+
expect(getTokenTypes('"hello\\nworld\\t\\r\\\\\\""')).toEqual([
|
|
52
|
+
TokenType.STRING,
|
|
53
|
+
TokenType.EOF
|
|
54
|
+
]);
|
|
55
|
+
|
|
56
|
+
expect(getTokenTypes('"\\u0048\\u0065\\u006C\\u006C\\u006F"')).toEqual([
|
|
57
|
+
TokenType.STRING,
|
|
58
|
+
TokenType.EOF
|
|
59
|
+
]);
|
|
60
|
+
|
|
61
|
+
// Mixed quotes
|
|
62
|
+
expect(getTokenTypes(`"single ' quote inside"`)).toEqual([
|
|
63
|
+
TokenType.STRING,
|
|
64
|
+
TokenType.EOF
|
|
65
|
+
]);
|
|
66
|
+
|
|
67
|
+
expect(getTokenTypes(`'double " quote inside'`)).toEqual([
|
|
68
|
+
TokenType.STRING,
|
|
69
|
+
TokenType.EOF
|
|
70
|
+
]);
|
|
71
|
+
});
|
|
72
|
+
|
|
73
|
+
it('tokenizes numbers', () => {
|
|
74
|
+
expect(getTokenTypes('42 3.14 0.5 123.456')).toEqual([
|
|
75
|
+
TokenType.NUMBER,
|
|
76
|
+
TokenType.NUMBER,
|
|
77
|
+
TokenType.NUMBER,
|
|
78
|
+
TokenType.NUMBER,
|
|
79
|
+
TokenType.EOF
|
|
80
|
+
]);
|
|
81
|
+
});
|
|
82
|
+
|
|
83
|
+
it('tokenizes datetime literals', () => {
|
|
84
|
+
expect(getTokenTypes('@2023 @2023-12 @2023-12-25 @2023-12-25T10:30:45.123Z')).toEqual([
|
|
85
|
+
TokenType.DATETIME,
|
|
86
|
+
TokenType.DATETIME,
|
|
87
|
+
TokenType.DATETIME,
|
|
88
|
+
TokenType.DATETIME,
|
|
89
|
+
TokenType.EOF
|
|
90
|
+
]);
|
|
91
|
+
});
|
|
92
|
+
|
|
93
|
+
it('tokenizes time literals', () => {
|
|
94
|
+
expect(getTokenTypes('@T10:30 @T10:30:45 @T10:30:45.123')).toEqual([
|
|
95
|
+
TokenType.TIME,
|
|
96
|
+
TokenType.TIME,
|
|
97
|
+
TokenType.TIME,
|
|
98
|
+
TokenType.EOF
|
|
99
|
+
]);
|
|
100
|
+
});
|
|
101
|
+
});
|
|
102
|
+
|
|
103
|
+
describe('identifiers', () => {
|
|
104
|
+
it('tokenizes simple identifiers', () => {
|
|
105
|
+
expect(getTokenTypes('foo bar_baz _test Test123')).toEqual([
|
|
106
|
+
TokenType.IDENTIFIER,
|
|
107
|
+
TokenType.IDENTIFIER,
|
|
108
|
+
TokenType.IDENTIFIER,
|
|
109
|
+
TokenType.IDENTIFIER,
|
|
110
|
+
TokenType.EOF
|
|
111
|
+
]);
|
|
112
|
+
});
|
|
113
|
+
|
|
114
|
+
it('tokenizes delimited identifiers with Unicode', () => {
|
|
115
|
+
// Unicode must be in delimited identifiers per spec
|
|
116
|
+
expect(getTokenTypes('`café` `münchen` `Σ` `λ`')).toEqual([
|
|
117
|
+
TokenType.DELIMITED_IDENTIFIER,
|
|
118
|
+
TokenType.DELIMITED_IDENTIFIER,
|
|
119
|
+
TokenType.DELIMITED_IDENTIFIER,
|
|
120
|
+
TokenType.DELIMITED_IDENTIFIER,
|
|
121
|
+
TokenType.EOF
|
|
122
|
+
]);
|
|
123
|
+
|
|
124
|
+
// Mixed ASCII and Unicode in delimited identifiers
|
|
125
|
+
expect(getTokenTypes('`test_café` `value_π` `x²`')).toEqual([
|
|
126
|
+
TokenType.DELIMITED_IDENTIFIER,
|
|
127
|
+
TokenType.DELIMITED_IDENTIFIER,
|
|
128
|
+
TokenType.DELIMITED_IDENTIFIER,
|
|
129
|
+
TokenType.EOF
|
|
130
|
+
]);
|
|
131
|
+
|
|
132
|
+
// Various Unicode categories in delimited identifiers
|
|
133
|
+
expect(getTokenTypes('`日本語` `中文` `한글` `العربية`')).toEqual([
|
|
134
|
+
TokenType.DELIMITED_IDENTIFIER,
|
|
135
|
+
TokenType.DELIMITED_IDENTIFIER,
|
|
136
|
+
TokenType.DELIMITED_IDENTIFIER,
|
|
137
|
+
TokenType.DELIMITED_IDENTIFIER,
|
|
138
|
+
TokenType.EOF
|
|
139
|
+
]);
|
|
140
|
+
});
|
|
141
|
+
|
|
142
|
+
it('tokenizes delimited identifiers', () => {
|
|
143
|
+
expect(getTokenTypes('`foo bar` `with\\`backtick`')).toEqual([
|
|
144
|
+
TokenType.DELIMITED_IDENTIFIER,
|
|
145
|
+
TokenType.DELIMITED_IDENTIFIER,
|
|
146
|
+
TokenType.EOF
|
|
147
|
+
]);
|
|
148
|
+
});
|
|
149
|
+
|
|
150
|
+
it('tokenizes special identifiers', () => {
|
|
151
|
+
expect(getTokenTypes('$this $index $total')).toEqual([
|
|
152
|
+
TokenType.THIS,
|
|
153
|
+
TokenType.INDEX,
|
|
154
|
+
TokenType.TOTAL,
|
|
155
|
+
TokenType.EOF
|
|
156
|
+
]);
|
|
157
|
+
});
|
|
158
|
+
|
|
159
|
+
it('tokenizes environment variables', () => {
|
|
160
|
+
// Identifier form
|
|
161
|
+
expect(getTokenTypes('%context %sct %vs')).toEqual([
|
|
162
|
+
TokenType.ENV_VAR,
|
|
163
|
+
TokenType.ENV_VAR,
|
|
164
|
+
TokenType.ENV_VAR,
|
|
165
|
+
TokenType.EOF
|
|
166
|
+
]);
|
|
167
|
+
|
|
168
|
+
// String form
|
|
169
|
+
expect(getTokenTypes(`%'simple string' %'with\\nescapes' %'unicode\\u0048'`)).toEqual([
|
|
170
|
+
TokenType.ENV_VAR,
|
|
171
|
+
TokenType.ENV_VAR,
|
|
172
|
+
TokenType.ENV_VAR,
|
|
173
|
+
TokenType.EOF
|
|
174
|
+
]);
|
|
175
|
+
|
|
176
|
+
// Delimited form
|
|
177
|
+
expect(getTokenTypes('%`any string name` %`with\\`backtick` %`complex-name_123`')).toEqual([
|
|
178
|
+
TokenType.ENV_VAR,
|
|
179
|
+
TokenType.ENV_VAR,
|
|
180
|
+
TokenType.ENV_VAR,
|
|
181
|
+
TokenType.EOF
|
|
182
|
+
]);
|
|
183
|
+
|
|
184
|
+
// Mixed with percent operator
|
|
185
|
+
expect(getTokenTypes('value % 10 %context')).toEqual([
|
|
186
|
+
TokenType.IDENTIFIER,
|
|
187
|
+
TokenType.PERCENT,
|
|
188
|
+
TokenType.NUMBER,
|
|
189
|
+
TokenType.ENV_VAR,
|
|
190
|
+
TokenType.EOF
|
|
191
|
+
]);
|
|
192
|
+
|
|
193
|
+
// Unicode in delimited form
|
|
194
|
+
expect(getTokenTypes('%`café` %`münchen` %`日本語`')).toEqual([
|
|
195
|
+
TokenType.ENV_VAR,
|
|
196
|
+
TokenType.ENV_VAR,
|
|
197
|
+
TokenType.ENV_VAR,
|
|
198
|
+
TokenType.EOF
|
|
199
|
+
]);
|
|
200
|
+
});
|
|
201
|
+
});
|
|
202
|
+
|
|
203
|
+
describe('keywords', () => {
|
|
204
|
+
it('tokenizes keywords', () => {
|
|
205
|
+
expect(getTokenTypes('as contains in is div mod and or xor implies')).toEqual([
|
|
206
|
+
TokenType.AS,
|
|
207
|
+
TokenType.CONTAINS,
|
|
208
|
+
TokenType.IN,
|
|
209
|
+
TokenType.IS,
|
|
210
|
+
TokenType.DIV,
|
|
211
|
+
TokenType.MOD,
|
|
212
|
+
TokenType.AND,
|
|
213
|
+
TokenType.OR,
|
|
214
|
+
TokenType.XOR,
|
|
215
|
+
TokenType.IMPLIES,
|
|
216
|
+
TokenType.EOF
|
|
217
|
+
]);
|
|
218
|
+
});
|
|
219
|
+
|
|
220
|
+
it('tokenizes time units', () => {
|
|
221
|
+
expect(getTokenTypes('year month week day hour minute second millisecond')).toEqual([
|
|
222
|
+
TokenType.YEAR,
|
|
223
|
+
TokenType.MONTH,
|
|
224
|
+
TokenType.WEEK,
|
|
225
|
+
TokenType.DAY,
|
|
226
|
+
TokenType.HOUR,
|
|
227
|
+
TokenType.MINUTE,
|
|
228
|
+
TokenType.SECOND,
|
|
229
|
+
TokenType.MILLISECOND,
|
|
230
|
+
TokenType.EOF
|
|
231
|
+
]);
|
|
232
|
+
});
|
|
233
|
+
|
|
234
|
+
it('tokenizes plural time units', () => {
|
|
235
|
+
expect(getTokenTypes('years months weeks days hours minutes seconds milliseconds')).toEqual([
|
|
236
|
+
TokenType.YEARS,
|
|
237
|
+
TokenType.MONTHS,
|
|
238
|
+
TokenType.WEEKS,
|
|
239
|
+
TokenType.DAYS,
|
|
240
|
+
TokenType.HOURS,
|
|
241
|
+
TokenType.MINUTES,
|
|
242
|
+
TokenType.SECONDS,
|
|
243
|
+
TokenType.MILLISECONDS,
|
|
244
|
+
TokenType.EOF
|
|
245
|
+
]);
|
|
246
|
+
});
|
|
247
|
+
});
|
|
248
|
+
|
|
249
|
+
describe('operators', () => {
|
|
250
|
+
it('tokenizes single-character operators', () => {
|
|
251
|
+
expect(getTokenTypes('. ( ) [ ] { } + - * / & | < > = ~ , % @')).toEqual([
|
|
252
|
+
TokenType.DOT,
|
|
253
|
+
TokenType.LPAREN,
|
|
254
|
+
TokenType.RPAREN,
|
|
255
|
+
TokenType.LBRACKET,
|
|
256
|
+
TokenType.RBRACKET,
|
|
257
|
+
TokenType.LBRACE,
|
|
258
|
+
TokenType.RBRACE,
|
|
259
|
+
TokenType.PLUS,
|
|
260
|
+
TokenType.MINUS,
|
|
261
|
+
TokenType.MULTIPLY,
|
|
262
|
+
TokenType.DIVIDE,
|
|
263
|
+
TokenType.AMPERSAND,
|
|
264
|
+
TokenType.PIPE,
|
|
265
|
+
TokenType.LT,
|
|
266
|
+
TokenType.GT,
|
|
267
|
+
TokenType.EQ,
|
|
268
|
+
TokenType.SIMILAR,
|
|
269
|
+
TokenType.COMMA,
|
|
270
|
+
TokenType.PERCENT,
|
|
271
|
+
TokenType.AT,
|
|
272
|
+
TokenType.EOF
|
|
273
|
+
]);
|
|
274
|
+
});
|
|
275
|
+
|
|
276
|
+
it('tokenizes two-character operators', () => {
|
|
277
|
+
expect(getTokenTypes('<= >= != !~')).toEqual([
|
|
278
|
+
TokenType.LTE,
|
|
279
|
+
TokenType.GTE,
|
|
280
|
+
TokenType.NEQ,
|
|
281
|
+
TokenType.NOT_SIMILAR,
|
|
282
|
+
TokenType.EOF
|
|
283
|
+
]);
|
|
284
|
+
});
|
|
285
|
+
});
|
|
286
|
+
|
|
287
|
+
describe('whitespace and comments', () => {
|
|
288
|
+
it('skips whitespace by default', () => {
|
|
289
|
+
expect(getTokenTypes('a b\t\tc\r\nd')).toEqual([
|
|
290
|
+
TokenType.IDENTIFIER,
|
|
291
|
+
TokenType.IDENTIFIER,
|
|
292
|
+
TokenType.IDENTIFIER,
|
|
293
|
+
TokenType.IDENTIFIER,
|
|
294
|
+
TokenType.EOF
|
|
295
|
+
]);
|
|
296
|
+
});
|
|
297
|
+
|
|
298
|
+
it('includes whitespace when configured', () => {
|
|
299
|
+
const lexer = new Lexer('a b', { skipWhitespace: false });
|
|
300
|
+
const types = lexer.tokenize().map(t => t.type);
|
|
301
|
+
expect(types).toEqual([
|
|
302
|
+
TokenType.IDENTIFIER,
|
|
303
|
+
TokenType.WHITESPACE,
|
|
304
|
+
TokenType.IDENTIFIER,
|
|
305
|
+
TokenType.EOF
|
|
306
|
+
]);
|
|
307
|
+
});
|
|
308
|
+
|
|
309
|
+
it('skips comments by default', () => {
|
|
310
|
+
expect(getTokenTypes('a /* comment */ b // line comment\nc')).toEqual([
|
|
311
|
+
TokenType.IDENTIFIER,
|
|
312
|
+
TokenType.IDENTIFIER,
|
|
313
|
+
TokenType.IDENTIFIER,
|
|
314
|
+
TokenType.EOF
|
|
315
|
+
]);
|
|
316
|
+
});
|
|
317
|
+
|
|
318
|
+
it('includes comments when configured', () => {
|
|
319
|
+
const lexer = new Lexer('a /* comment */ b', { skipComments: false, skipWhitespace: false });
|
|
320
|
+
const types = lexer.tokenize().map(t => t.type);
|
|
321
|
+
expect(types).toEqual([
|
|
322
|
+
TokenType.IDENTIFIER,
|
|
323
|
+
TokenType.WHITESPACE,
|
|
324
|
+
TokenType.COMMENT,
|
|
325
|
+
TokenType.WHITESPACE,
|
|
326
|
+
TokenType.IDENTIFIER,
|
|
327
|
+
TokenType.EOF
|
|
328
|
+
]);
|
|
329
|
+
});
|
|
330
|
+
});
|
|
331
|
+
|
|
332
|
+
describe('complex expressions', () => {
|
|
333
|
+
it('tokenizes property access', () => {
|
|
334
|
+
expect(getTokenTypes('Patient.name.given')).toEqual([
|
|
335
|
+
TokenType.IDENTIFIER,
|
|
336
|
+
TokenType.DOT,
|
|
337
|
+
TokenType.IDENTIFIER,
|
|
338
|
+
TokenType.DOT,
|
|
339
|
+
TokenType.IDENTIFIER,
|
|
340
|
+
TokenType.EOF
|
|
341
|
+
]);
|
|
342
|
+
});
|
|
343
|
+
|
|
344
|
+
it('tokenizes function calls', () => {
|
|
345
|
+
expect(getTokenTypes('where(active = true)')).toEqual([
|
|
346
|
+
TokenType.IDENTIFIER,
|
|
347
|
+
TokenType.LPAREN,
|
|
348
|
+
TokenType.IDENTIFIER,
|
|
349
|
+
TokenType.EQ,
|
|
350
|
+
TokenType.TRUE,
|
|
351
|
+
TokenType.RPAREN,
|
|
352
|
+
TokenType.EOF
|
|
353
|
+
]);
|
|
354
|
+
});
|
|
355
|
+
|
|
356
|
+
it('tokenizes arithmetic expressions', () => {
|
|
357
|
+
expect(getTokenTypes('5 + 3 * 2 - 1')).toEqual([
|
|
358
|
+
TokenType.NUMBER,
|
|
359
|
+
TokenType.PLUS,
|
|
360
|
+
TokenType.NUMBER,
|
|
361
|
+
TokenType.MULTIPLY,
|
|
362
|
+
TokenType.NUMBER,
|
|
363
|
+
TokenType.MINUS,
|
|
364
|
+
TokenType.NUMBER,
|
|
365
|
+
TokenType.EOF
|
|
366
|
+
]);
|
|
367
|
+
});
|
|
368
|
+
|
|
369
|
+
it('tokenizes quantity with units', () => {
|
|
370
|
+
expect(getTokenTypes("5 years 3.5 'mg'")).toEqual([
|
|
371
|
+
TokenType.NUMBER,
|
|
372
|
+
TokenType.YEARS,
|
|
373
|
+
TokenType.NUMBER,
|
|
374
|
+
TokenType.STRING,
|
|
375
|
+
TokenType.EOF
|
|
376
|
+
]);
|
|
377
|
+
});
|
|
378
|
+
|
|
379
|
+
it("human readable", () => {
|
|
380
|
+
const lexer = new Lexer("Patient.name.where(given = 'John')");
|
|
381
|
+
const tokens = lexer.tokenize();
|
|
382
|
+
console.log(tokens);
|
|
383
|
+
});
|
|
384
|
+
|
|
385
|
+
it('tokenizes expressions with double-quoted strings', () => {
|
|
386
|
+
expect(getTokenTypes(`"Hello" + " " + "World"`)).toEqual([
|
|
387
|
+
TokenType.STRING,
|
|
388
|
+
TokenType.PLUS,
|
|
389
|
+
TokenType.STRING,
|
|
390
|
+
TokenType.PLUS,
|
|
391
|
+
TokenType.STRING,
|
|
392
|
+
TokenType.EOF
|
|
393
|
+
]);
|
|
394
|
+
|
|
395
|
+
expect(getTokenTypes(`name.where(use = "official")`)).toEqual([
|
|
396
|
+
TokenType.IDENTIFIER,
|
|
397
|
+
TokenType.DOT,
|
|
398
|
+
TokenType.IDENTIFIER,
|
|
399
|
+
TokenType.LPAREN,
|
|
400
|
+
TokenType.IDENTIFIER,
|
|
401
|
+
TokenType.EQ,
|
|
402
|
+
TokenType.STRING,
|
|
403
|
+
TokenType.RPAREN,
|
|
404
|
+
TokenType.EOF
|
|
405
|
+
]);
|
|
406
|
+
});
|
|
407
|
+
});
|
|
408
|
+
|
|
409
|
+
describe('error handling', () => {
|
|
410
|
+
it('throws on unexpected character', () => {
|
|
411
|
+
expect(() => new Lexer('a $ b').tokenize()).toThrow('Unexpected character');
|
|
412
|
+
});
|
|
413
|
+
|
|
414
|
+
it('throws on unterminated string', () => {
|
|
415
|
+
expect(() => new Lexer("'unterminated").tokenize()).toThrow('Unterminated string');
|
|
416
|
+
expect(() => new Lexer('"unterminated').tokenize()).toThrow('Unterminated string');
|
|
417
|
+
});
|
|
418
|
+
|
|
419
|
+
it('throws on invalid escape sequence', () => {
|
|
420
|
+
expect(() => new Lexer("'\\q'").tokenize()).toThrow('Invalid escape sequence');
|
|
421
|
+
expect(() => new Lexer('"\\q"').tokenize()).toThrow('Invalid escape sequence');
|
|
422
|
+
});
|
|
423
|
+
|
|
424
|
+
it('throws on invalid unicode escape', () => {
|
|
425
|
+
expect(() => new Lexer("'\\uXYZ'").tokenize()).toThrow('Invalid unicode escape');
|
|
426
|
+
expect(() => new Lexer('"\\uXYZ"').tokenize()).toThrow('Invalid unicode escape');
|
|
427
|
+
});
|
|
428
|
+
|
|
429
|
+
it('throws on unterminated environment variables', () => {
|
|
430
|
+
expect(() => new Lexer("%'unterminated").tokenize()).toThrow('Unterminated environment variable string');
|
|
431
|
+
expect(() => new Lexer("%`unterminated").tokenize()).toThrow('Unterminated environment variable delimiter');
|
|
432
|
+
});
|
|
433
|
+
|
|
434
|
+
it('throws on invalid escape in environment variables', () => {
|
|
435
|
+
expect(() => new Lexer("%'\\q'").tokenize()).toThrow('Invalid escape sequence');
|
|
436
|
+
expect(() => new Lexer("%'\\uXYZ'").tokenize()).toThrow('Invalid unicode escape');
|
|
437
|
+
});
|
|
438
|
+
|
|
439
|
+
it('throws on Unicode in regular identifiers', () => {
|
|
440
|
+
// Unicode is not allowed in regular identifiers per spec
|
|
441
|
+
expect(() => new Lexer('café').tokenize()).toThrow('Unexpected character');
|
|
442
|
+
expect(() => new Lexer('münchen').tokenize()).toThrow('Unexpected character');
|
|
443
|
+
expect(() => new Lexer('日本語').tokenize()).toThrow('Unexpected character');
|
|
444
|
+
});
|
|
445
|
+
});
|
|
446
|
+
|
|
447
|
+
describe('trivia and channels', () => {
|
|
448
|
+
it('preserves whitespace and comments with channel information', () => {
|
|
449
|
+
const lexer = new Lexer('a /* comment */ b', { preserveTrivia: true });
|
|
450
|
+
const tokens = lexer.tokenize();
|
|
451
|
+
|
|
452
|
+
expect(tokens.length).toBe(6); // a, whitespace, comment, whitespace, b, EOF
|
|
453
|
+
expect(tokens[0]!.type).toBe(TokenType.IDENTIFIER);
|
|
454
|
+
expect(tokens[0]!.channel).toBeUndefined(); // Regular tokens don't have channel
|
|
455
|
+
|
|
456
|
+
expect(tokens[1]!.type).toBe(TokenType.WHITESPACE);
|
|
457
|
+
expect(tokens[1]!.channel).toBe(Channel.HIDDEN);
|
|
458
|
+
|
|
459
|
+
expect(tokens[2]!.type).toBe(TokenType.COMMENT);
|
|
460
|
+
expect(tokens[2]!.channel).toBe(Channel.HIDDEN);
|
|
461
|
+
|
|
462
|
+
expect(tokens[3]!.type).toBe(TokenType.WHITESPACE);
|
|
463
|
+
expect(tokens[3]!.channel).toBe(Channel.HIDDEN);
|
|
464
|
+
|
|
465
|
+
expect(tokens[4]!.type).toBe(TokenType.IDENTIFIER);
|
|
466
|
+
expect(tokens[4]!.channel).toBeUndefined();
|
|
467
|
+
});
|
|
468
|
+
|
|
469
|
+
it('preserves line comments with channel information', () => {
|
|
470
|
+
const lexer = new Lexer('a // comment\nb', { preserveTrivia: true });
|
|
471
|
+
const tokens = lexer.tokenize();
|
|
472
|
+
|
|
473
|
+
expect(tokens[0]!.type).toBe(TokenType.IDENTIFIER);
|
|
474
|
+
expect(tokens[1]!.type).toBe(TokenType.WHITESPACE);
|
|
475
|
+
expect(tokens[1]!.channel).toBe(Channel.HIDDEN);
|
|
476
|
+
expect(tokens[2]!.type).toBe(TokenType.LINE_COMMENT);
|
|
477
|
+
expect(tokens[2]!.channel).toBe(Channel.HIDDEN);
|
|
478
|
+
expect(tokens[3]!.type).toBe(TokenType.WHITESPACE); // newline
|
|
479
|
+
expect(tokens[3]!.channel).toBe(Channel.HIDDEN);
|
|
480
|
+
expect(tokens[4]!.type).toBe(TokenType.IDENTIFIER);
|
|
481
|
+
});
|
|
482
|
+
|
|
483
|
+
it('can filter tokens by channel', () => {
|
|
484
|
+
const lexer = new Lexer('Patient . name // comment', { preserveTrivia: true });
|
|
485
|
+
const allTokens = lexer.tokenize();
|
|
486
|
+
|
|
487
|
+
// Filter regular tokens
|
|
488
|
+
const regularTokens = allTokens.filter(t => t.channel !== Channel.HIDDEN);
|
|
489
|
+
expect(regularTokens.map(t => t.type)).toEqual([
|
|
490
|
+
TokenType.IDENTIFIER,
|
|
491
|
+
TokenType.DOT,
|
|
492
|
+
TokenType.IDENTIFIER,
|
|
493
|
+
TokenType.EOF
|
|
494
|
+
]);
|
|
495
|
+
|
|
496
|
+
// Filter hidden tokens
|
|
497
|
+
const hiddenTokens = allTokens.filter(t => t.channel === Channel.HIDDEN);
|
|
498
|
+
expect(hiddenTokens.map(t => t.type)).toEqual([
|
|
499
|
+
TokenType.WHITESPACE,
|
|
500
|
+
TokenType.WHITESPACE,
|
|
501
|
+
TokenType.WHITESPACE,
|
|
502
|
+
TokenType.LINE_COMMENT
|
|
503
|
+
]);
|
|
504
|
+
});
|
|
505
|
+
|
|
506
|
+
it('preserveTrivia overrides skipWhitespace and skipComments', () => {
|
|
507
|
+
const lexer = new Lexer('a /* c */ b', {
|
|
508
|
+
preserveTrivia: true,
|
|
509
|
+
skipWhitespace: true, // Should be overridden
|
|
510
|
+
skipComments: true // Should be overridden
|
|
511
|
+
});
|
|
512
|
+
const tokens = lexer.tokenize();
|
|
513
|
+
|
|
514
|
+
// Should include whitespace and comments despite skip flags
|
|
515
|
+
expect(tokens.map(t => t.type)).toEqual([
|
|
516
|
+
TokenType.IDENTIFIER,
|
|
517
|
+
TokenType.WHITESPACE,
|
|
518
|
+
TokenType.COMMENT,
|
|
519
|
+
TokenType.WHITESPACE,
|
|
520
|
+
TokenType.IDENTIFIER,
|
|
521
|
+
TokenType.EOF
|
|
522
|
+
]);
|
|
523
|
+
});
|
|
524
|
+
|
|
525
|
+
it('performance is minimally impacted by channel assignment', () => {
|
|
526
|
+
const expression = 'Patient.name.given.where(use = "official")';
|
|
527
|
+
const iterations = 10000;
|
|
528
|
+
|
|
529
|
+
// Test without preserveTrivia
|
|
530
|
+
const start1 = performance.now();
|
|
531
|
+
for (let i = 0; i < iterations; i++) {
|
|
532
|
+
new Lexer(expression).tokenize();
|
|
533
|
+
}
|
|
534
|
+
const timeWithout = performance.now() - start1;
|
|
535
|
+
|
|
536
|
+
// Test with preserveTrivia
|
|
537
|
+
const start2 = performance.now();
|
|
538
|
+
for (let i = 0; i < iterations; i++) {
|
|
539
|
+
new Lexer(expression, { preserveTrivia: true }).tokenize();
|
|
540
|
+
}
|
|
541
|
+
const timeWith = performance.now() - start2;
|
|
542
|
+
|
|
543
|
+
// Should be less than 10% performance impact
|
|
544
|
+
const overhead = ((timeWith - timeWithout) / timeWithout) * 100;
|
|
545
|
+
console.log(`Trivia overhead: ${overhead.toFixed(1)}%`);
|
|
546
|
+
expect(overhead).toBeLessThan(10);
|
|
547
|
+
});
|
|
548
|
+
});
|
|
549
|
+
});
|