@atomic-ehr/fhirpath 0.0.1-canary.0c6931e.20250727185306
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +473 -0
- package/dist/index.d.ts +462 -0
- package/dist/index.js +10307 -0
- package/dist/index.js.map +1 -0
- package/package.json +58 -0
- package/src/analyzer/analyzer.ts +499 -0
- package/src/analyzer/model-provider.ts +244 -0
- package/src/analyzer/schemas/index.ts +2 -0
- package/src/analyzer/schemas/types.ts +40 -0
- package/src/analyzer/types.ts +142 -0
- package/src/api/builder.ts +157 -0
- package/src/api/errors.ts +145 -0
- package/src/api/expression.ts +156 -0
- package/src/api/index.ts +122 -0
- package/src/api/inspect.ts +99 -0
- package/src/api/registry.ts +128 -0
- package/src/api/types.ts +210 -0
- package/src/compiler/compiler.ts +546 -0
- package/src/compiler/index.ts +2 -0
- package/src/compiler/prototype-context-adapter.ts +99 -0
- package/src/compiler/types.ts +24 -0
- package/src/index.ts +107 -0
- package/src/interpreter/README.md +78 -0
- package/src/interpreter/interpreter.ts +475 -0
- package/src/interpreter/types.ts +108 -0
- package/src/lexer/char-tables.ts +37 -0
- package/src/lexer/errors.ts +31 -0
- package/src/lexer/index.ts +5 -0
- package/src/lexer/lexer.ts +745 -0
- package/src/lexer/token.ts +104 -0
- package/src/lexer2/index.md +232 -0
- package/src/lexer2/index.perf.test.ts +68 -0
- package/src/lexer2/index.test.ts +549 -0
- package/src/lexer2/index.ts +1251 -0
- package/src/lexer2/notes.md +173 -0
- package/src/lexer2/optimization-summary.md +718 -0
- package/src/parser/ast-factory.ts +220 -0
- package/src/parser/ast.ts +144 -0
- package/src/parser/collection-parser.ts +89 -0
- package/src/parser/diagnostic-messages.ts +216 -0
- package/src/parser/diagnostics.ts +85 -0
- package/src/parser/error-reporter.ts +230 -0
- package/src/parser/index.ts +3 -0
- package/src/parser/literal-parser.ts +103 -0
- package/src/parser/parse-error.ts +16 -0
- package/src/parser/parser-error-factory.ts +141 -0
- package/src/parser/parser-state.ts +134 -0
- package/src/parser/parser.ts +1272 -0
- package/src/parser/pprint.ts +169 -0
- package/src/parser/precedence-manager.ts +64 -0
- package/src/parser/source-mapper.ts +248 -0
- package/src/parser/special-constructs.ts +142 -0
- package/src/parser/token-navigator.ts +110 -0
- package/src/parser/types.ts +60 -0
- package/src/parser2/index.md +177 -0
- package/src/parser2/index.perf.test.ts +184 -0
- package/src/parser2/index.test.ts +305 -0
- package/src/parser2/index.ts +578 -0
- package/src/parser2/optimization-summary.md +176 -0
- package/src/registry/default-analyzers.ts +257 -0
- package/src/registry/default-compilers.ts +31 -0
- package/src/registry/index.ts +96 -0
- package/src/registry/operations/arithmetic.ts +506 -0
- package/src/registry/operations/collection.ts +425 -0
- package/src/registry/operations/comparison.ts +432 -0
- package/src/registry/operations/existence.ts +703 -0
- package/src/registry/operations/filtering.ts +358 -0
- package/src/registry/operations/literals.ts +341 -0
- package/src/registry/operations/logical.ts +439 -0
- package/src/registry/operations/math.ts +128 -0
- package/src/registry/operations/membership.ts +132 -0
- package/src/registry/operations/navigation.ts +52 -0
- package/src/registry/operations/string.ts +507 -0
- package/src/registry/operations/subsetting.ts +174 -0
- package/src/registry/operations/type-checking.ts +162 -0
- package/src/registry/operations/type-conversion.ts +404 -0
- package/src/registry/operations/type-operators.ts +308 -0
- package/src/registry/operations/utility.ts +644 -0
- package/src/registry/registry.ts +146 -0
- package/src/registry/types.ts +161 -0
- package/src/registry/utils/evaluation-helpers.ts +93 -0
- package/src/registry/utils/index.ts +3 -0
- package/src/registry/utils/type-system.ts +173 -0
- package/src/runtime/context.ts +158 -0
- package/src/runtime/debug-context.ts +135 -0
|
@@ -0,0 +1,1251 @@
|
|
|
1
|
+
export enum TokenType {
|
|
2
|
+
// Non-operators (no precedence)
|
|
3
|
+
// Literals
|
|
4
|
+
NULL = 0x0001,
|
|
5
|
+
BOOLEAN = 0x0002,
|
|
6
|
+
STRING = 0x0003,
|
|
7
|
+
NUMBER = 0x0004,
|
|
8
|
+
DATETIME = 0x0005,
|
|
9
|
+
TIME = 0x0006,
|
|
10
|
+
|
|
11
|
+
// Identifiers
|
|
12
|
+
IDENTIFIER = 0x0007,
|
|
13
|
+
DELIMITED_IDENTIFIER = 0x0008,
|
|
14
|
+
|
|
15
|
+
// Keywords (some used as operators with precedence)
|
|
16
|
+
TRUE = 0x0009,
|
|
17
|
+
FALSE = 0x000A,
|
|
18
|
+
|
|
19
|
+
// Special identifiers
|
|
20
|
+
THIS = 0x000B,
|
|
21
|
+
INDEX = 0x000C,
|
|
22
|
+
TOTAL = 0x000D,
|
|
23
|
+
|
|
24
|
+
// Environment variable
|
|
25
|
+
ENV_VAR = 0x000E,
|
|
26
|
+
|
|
27
|
+
// Date/time units
|
|
28
|
+
YEAR = 0x000F,
|
|
29
|
+
MONTH = 0x0010,
|
|
30
|
+
WEEK = 0x0011,
|
|
31
|
+
DAY = 0x0012,
|
|
32
|
+
HOUR = 0x0013,
|
|
33
|
+
MINUTE = 0x0014,
|
|
34
|
+
SECOND = 0x0015,
|
|
35
|
+
MILLISECOND = 0x0016,
|
|
36
|
+
YEARS = 0x0017,
|
|
37
|
+
MONTHS = 0x0018,
|
|
38
|
+
WEEKS = 0x0019,
|
|
39
|
+
DAYS = 0x001A,
|
|
40
|
+
HOURS = 0x001B,
|
|
41
|
+
MINUTES = 0x001C,
|
|
42
|
+
SECONDS = 0x001D,
|
|
43
|
+
MILLISECONDS = 0x001E,
|
|
44
|
+
|
|
45
|
+
// Special
|
|
46
|
+
EOF = 0x001F,
|
|
47
|
+
WHITESPACE = 0x0020,
|
|
48
|
+
COMMENT = 0x0021,
|
|
49
|
+
LINE_COMMENT = 0x0022,
|
|
50
|
+
|
|
51
|
+
// Operators with precedence (0xPPXX where PP is precedence in hex)
|
|
52
|
+
// Precedence 5
|
|
53
|
+
PIPE = 0x0501, // precedence 5
|
|
54
|
+
|
|
55
|
+
// Precedence 10
|
|
56
|
+
IMPLIES = 0x0A01, // precedence 10
|
|
57
|
+
|
|
58
|
+
// Precedence 20
|
|
59
|
+
OR = 0x1401, // precedence 20
|
|
60
|
+
XOR = 0x1402, // precedence 20
|
|
61
|
+
|
|
62
|
+
// Precedence 30
|
|
63
|
+
AND = 0x1E01, // precedence 30
|
|
64
|
+
|
|
65
|
+
// Precedence 35
|
|
66
|
+
IN = 0x2301, // precedence 35
|
|
67
|
+
CONTAINS = 0x2302, // precedence 35
|
|
68
|
+
|
|
69
|
+
// Precedence 40
|
|
70
|
+
EQ = 0x2801, // precedence 40
|
|
71
|
+
NEQ = 0x2802, // precedence 40
|
|
72
|
+
SIMILAR = 0x2803, // precedence 40
|
|
73
|
+
NOT_SIMILAR = 0x2804, // precedence 40
|
|
74
|
+
|
|
75
|
+
// Precedence 50
|
|
76
|
+
LT = 0x3201, // precedence 50
|
|
77
|
+
GT = 0x3202, // precedence 50
|
|
78
|
+
LTE = 0x3203, // precedence 50
|
|
79
|
+
GTE = 0x3204, // precedence 50
|
|
80
|
+
|
|
81
|
+
// Precedence 60
|
|
82
|
+
AMPERSAND = 0x3C01, // precedence 60
|
|
83
|
+
|
|
84
|
+
// Precedence 70
|
|
85
|
+
PLUS = 0x4601, // precedence 70
|
|
86
|
+
MINUS = 0x4602, // precedence 70
|
|
87
|
+
|
|
88
|
+
// Precedence 80
|
|
89
|
+
MULTIPLY = 0x5001, // precedence 80
|
|
90
|
+
DIVIDE = 0x5002, // precedence 80
|
|
91
|
+
DIV = 0x5003, // precedence 80
|
|
92
|
+
MOD = 0x5004, // precedence 80
|
|
93
|
+
|
|
94
|
+
// Precedence 90
|
|
95
|
+
AS = 0x5A01, // precedence 90
|
|
96
|
+
IS = 0x5A02, // precedence 90
|
|
97
|
+
|
|
98
|
+
// Precedence 100
|
|
99
|
+
DOT = 0x6401, // precedence 100
|
|
100
|
+
LBRACKET = 0x6402, // precedence 100
|
|
101
|
+
LPAREN = 0x6403, // precedence 100
|
|
102
|
+
|
|
103
|
+
// Non-operator tokens (no precedence)
|
|
104
|
+
RPAREN = 0x0023,
|
|
105
|
+
RBRACKET = 0x0024,
|
|
106
|
+
LBRACE = 0x0025,
|
|
107
|
+
RBRACE = 0x0026,
|
|
108
|
+
COMMA = 0x0027,
|
|
109
|
+
PERCENT = 0x0028,
|
|
110
|
+
AT = 0x0029,
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
// Helper to convert numeric token type to string for debugging
|
|
114
|
+
const TOKEN_TYPE_NAMES: { [key: number]: string } = {
|
|
115
|
+
[TokenType.NULL]: 'NULL',
|
|
116
|
+
[TokenType.BOOLEAN]: 'BOOLEAN',
|
|
117
|
+
[TokenType.STRING]: 'STRING',
|
|
118
|
+
[TokenType.NUMBER]: 'NUMBER',
|
|
119
|
+
[TokenType.DATETIME]: 'DATETIME',
|
|
120
|
+
[TokenType.TIME]: 'TIME',
|
|
121
|
+
[TokenType.IDENTIFIER]: 'IDENTIFIER',
|
|
122
|
+
[TokenType.DELIMITED_IDENTIFIER]: 'DELIMITED_IDENTIFIER',
|
|
123
|
+
[TokenType.TRUE]: 'TRUE',
|
|
124
|
+
[TokenType.FALSE]: 'FALSE',
|
|
125
|
+
[TokenType.AS]: 'AS',
|
|
126
|
+
[TokenType.CONTAINS]: 'CONTAINS',
|
|
127
|
+
[TokenType.IN]: 'IN',
|
|
128
|
+
[TokenType.IS]: 'IS',
|
|
129
|
+
[TokenType.DIV]: 'DIV',
|
|
130
|
+
[TokenType.MOD]: 'MOD',
|
|
131
|
+
[TokenType.AND]: 'AND',
|
|
132
|
+
[TokenType.OR]: 'OR',
|
|
133
|
+
[TokenType.XOR]: 'XOR',
|
|
134
|
+
[TokenType.IMPLIES]: 'IMPLIES',
|
|
135
|
+
[TokenType.THIS]: 'THIS',
|
|
136
|
+
[TokenType.INDEX]: 'INDEX',
|
|
137
|
+
[TokenType.TOTAL]: 'TOTAL',
|
|
138
|
+
[TokenType.DOT]: 'DOT',
|
|
139
|
+
[TokenType.LPAREN]: 'LPAREN',
|
|
140
|
+
[TokenType.RPAREN]: 'RPAREN',
|
|
141
|
+
[TokenType.LBRACKET]: 'LBRACKET',
|
|
142
|
+
[TokenType.RBRACKET]: 'RBRACKET',
|
|
143
|
+
[TokenType.LBRACE]: 'LBRACE',
|
|
144
|
+
[TokenType.RBRACE]: 'RBRACE',
|
|
145
|
+
[TokenType.PLUS]: 'PLUS',
|
|
146
|
+
[TokenType.MINUS]: 'MINUS',
|
|
147
|
+
[TokenType.MULTIPLY]: 'MULTIPLY',
|
|
148
|
+
[TokenType.DIVIDE]: 'DIVIDE',
|
|
149
|
+
[TokenType.AMPERSAND]: 'AMPERSAND',
|
|
150
|
+
[TokenType.PIPE]: 'PIPE',
|
|
151
|
+
[TokenType.LTE]: 'LTE',
|
|
152
|
+
[TokenType.LT]: 'LT',
|
|
153
|
+
[TokenType.GT]: 'GT',
|
|
154
|
+
[TokenType.GTE]: 'GTE',
|
|
155
|
+
[TokenType.EQ]: 'EQ',
|
|
156
|
+
[TokenType.NEQ]: 'NEQ',
|
|
157
|
+
[TokenType.SIMILAR]: 'SIMILAR',
|
|
158
|
+
[TokenType.NOT_SIMILAR]: 'NOT_SIMILAR',
|
|
159
|
+
[TokenType.COMMA]: 'COMMA',
|
|
160
|
+
[TokenType.PERCENT]: 'PERCENT',
|
|
161
|
+
[TokenType.AT]: 'AT',
|
|
162
|
+
[TokenType.ENV_VAR]: 'ENV_VAR',
|
|
163
|
+
[TokenType.YEAR]: 'YEAR',
|
|
164
|
+
[TokenType.MONTH]: 'MONTH',
|
|
165
|
+
[TokenType.WEEK]: 'WEEK',
|
|
166
|
+
[TokenType.DAY]: 'DAY',
|
|
167
|
+
[TokenType.HOUR]: 'HOUR',
|
|
168
|
+
[TokenType.MINUTE]: 'MINUTE',
|
|
169
|
+
[TokenType.SECOND]: 'SECOND',
|
|
170
|
+
[TokenType.MILLISECOND]: 'MILLISECOND',
|
|
171
|
+
[TokenType.YEARS]: 'YEARS',
|
|
172
|
+
[TokenType.MONTHS]: 'MONTHS',
|
|
173
|
+
[TokenType.WEEKS]: 'WEEKS',
|
|
174
|
+
[TokenType.DAYS]: 'DAYS',
|
|
175
|
+
[TokenType.HOURS]: 'HOURS',
|
|
176
|
+
[TokenType.MINUTES]: 'MINUTES',
|
|
177
|
+
[TokenType.SECONDS]: 'SECONDS',
|
|
178
|
+
[TokenType.MILLISECONDS]: 'MILLISECONDS',
|
|
179
|
+
[TokenType.EOF]: 'EOF',
|
|
180
|
+
[TokenType.WHITESPACE]: 'WHITESPACE',
|
|
181
|
+
[TokenType.COMMENT]: 'COMMENT',
|
|
182
|
+
[TokenType.LINE_COMMENT]: 'LINE_COMMENT',
|
|
183
|
+
};
|
|
184
|
+
|
|
185
|
+
export function tokenTypeToString(type: TokenType): string {
|
|
186
|
+
return TOKEN_TYPE_NAMES[type] || `UNKNOWN(${type})`;
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
export enum Channel {
|
|
190
|
+
REGULAR = 0,
|
|
191
|
+
HIDDEN = 1,
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
export interface Token {
|
|
195
|
+
type: TokenType;
|
|
196
|
+
start: number;
|
|
197
|
+
end: number;
|
|
198
|
+
line: number;
|
|
199
|
+
column: number;
|
|
200
|
+
channel?: Channel;
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
export interface LexerOptions {
|
|
204
|
+
skipWhitespace?: boolean;
|
|
205
|
+
skipComments?: boolean;
|
|
206
|
+
preserveTrivia?: boolean; // When true, whitespace/comments get Channel.HIDDEN
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
// Character code constants
|
|
210
|
+
const CHAR_0 = 48;
|
|
211
|
+
const CHAR_9 = 57;
|
|
212
|
+
const CHAR_A = 65;
|
|
213
|
+
const CHAR_F = 70;
|
|
214
|
+
const CHAR_Z = 90;
|
|
215
|
+
const CHAR_UNDERSCORE = 95;
|
|
216
|
+
const CHAR_a = 97;
|
|
217
|
+
const CHAR_f = 102;
|
|
218
|
+
const CHAR_z = 122;
|
|
219
|
+
|
|
220
|
+
// Lookup tables for character classification
|
|
221
|
+
const IS_DIGIT = new Uint8Array(256);
|
|
222
|
+
const IS_LETTER = new Uint8Array(256);
|
|
223
|
+
const IS_LETTER_OR_DIGIT = new Uint8Array(256);
|
|
224
|
+
const IS_HEX_DIGIT = new Uint8Array(256);
|
|
225
|
+
|
|
226
|
+
// Initialize lookup tables
|
|
227
|
+
for (let i = 0; i < 256; i++) {
|
|
228
|
+
if (i >= CHAR_0 && i <= CHAR_9) {
|
|
229
|
+
IS_DIGIT[i] = 1;
|
|
230
|
+
IS_LETTER_OR_DIGIT[i] = 1;
|
|
231
|
+
IS_HEX_DIGIT[i] = 1;
|
|
232
|
+
}
|
|
233
|
+
if ((i >= CHAR_A && i <= CHAR_Z) || (i >= CHAR_a && i <= CHAR_z) || i === CHAR_UNDERSCORE) {
|
|
234
|
+
IS_LETTER[i] = 1;
|
|
235
|
+
IS_LETTER_OR_DIGIT[i] = 1;
|
|
236
|
+
}
|
|
237
|
+
if ((i >= CHAR_A && i <= CHAR_F) || (i >= CHAR_a && i <= CHAR_f)) {
|
|
238
|
+
IS_HEX_DIGIT[i] = 1;
|
|
239
|
+
}
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
export class Lexer {
|
|
243
|
+
private input: string;
|
|
244
|
+
private position: number = 0;
|
|
245
|
+
private line: number = 1;
|
|
246
|
+
private column: number = 1;
|
|
247
|
+
private options: LexerOptions;
|
|
248
|
+
|
|
249
|
+
constructor(input: string, options: LexerOptions = {}) {
|
|
250
|
+
this.input = input;
|
|
251
|
+
this.options = {
|
|
252
|
+
skipWhitespace: options.skipWhitespace ?? true,
|
|
253
|
+
skipComments: options.skipComments ?? true,
|
|
254
|
+
preserveTrivia: options.preserveTrivia ?? false,
|
|
255
|
+
};
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
private peek(offset: number = 0): string {
|
|
259
|
+
const pos = this.position + offset;
|
|
260
|
+
if (pos >= this.input.length) {
|
|
261
|
+
return '';
|
|
262
|
+
}
|
|
263
|
+
return this.input[pos] || '';
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
private peekCharCode(offset: number = 0): number {
|
|
267
|
+
const pos = this.position + offset;
|
|
268
|
+
if (pos >= this.input.length) {
|
|
269
|
+
return -1;
|
|
270
|
+
}
|
|
271
|
+
return this.input.charCodeAt(pos);
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
|
|
275
|
+
private throwUnexpectedChar(char: string): never {
|
|
276
|
+
throw new Error(`Unexpected character '${char}' at position ${this.position}`);
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
private throwUnexpectedCharCode(charCode: number): never {
|
|
280
|
+
throw new Error(`Unexpected character '${String.fromCharCode(charCode)}' at position ${this.position}`);
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
private advance(): string {
|
|
284
|
+
if (this.position >= this.input.length) {
|
|
285
|
+
return '';
|
|
286
|
+
}
|
|
287
|
+
const char = this.input[this.position] || '';
|
|
288
|
+
this.position++;
|
|
289
|
+
|
|
290
|
+
// Update line and column
|
|
291
|
+
if (char === '\n') {
|
|
292
|
+
this.line++;
|
|
293
|
+
this.column = 1;
|
|
294
|
+
} else {
|
|
295
|
+
this.column++;
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
return char;
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
private readWhitespace(): Token | null {
|
|
302
|
+
const start = this.position;
|
|
303
|
+
const startLine = this.line;
|
|
304
|
+
const startColumn = this.column;
|
|
305
|
+
|
|
306
|
+
// Inline whitespace reading with character code switch
|
|
307
|
+
while (this.position < this.input.length) {
|
|
308
|
+
const charCode = this.input.charCodeAt(this.position);
|
|
309
|
+
|
|
310
|
+
switch (charCode) {
|
|
311
|
+
case 32: // ' ' (space)
|
|
312
|
+
case 9: // '\t' (tab)
|
|
313
|
+
this.position++;
|
|
314
|
+
this.column++;
|
|
315
|
+
break;
|
|
316
|
+
case 13: // '\r' (carriage return)
|
|
317
|
+
this.position++;
|
|
318
|
+
// Don't update column for \r
|
|
319
|
+
break;
|
|
320
|
+
case 10: // '\n' (line feed)
|
|
321
|
+
this.position++;
|
|
322
|
+
this.line++;
|
|
323
|
+
this.column = 1;
|
|
324
|
+
break;
|
|
325
|
+
default:
|
|
326
|
+
// Not whitespace, exit loop
|
|
327
|
+
if (this.position > start) {
|
|
328
|
+
const token: Token = { type: TokenType.WHITESPACE, start, end: this.position, line: startLine, column: startColumn };
|
|
329
|
+
if (this.options.preserveTrivia) {
|
|
330
|
+
token.channel = Channel.HIDDEN;
|
|
331
|
+
}
|
|
332
|
+
return token;
|
|
333
|
+
}
|
|
334
|
+
return null;
|
|
335
|
+
}
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
// Reached end of input
|
|
339
|
+
if (this.position > start) {
|
|
340
|
+
const token: Token = { type: TokenType.WHITESPACE, start, end: this.position, line: startLine, column: startColumn };
|
|
341
|
+
if (this.options.preserveTrivia) {
|
|
342
|
+
token.channel = Channel.HIDDEN;
|
|
343
|
+
}
|
|
344
|
+
return token;
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
return null;
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
private readComment(): Token | null {
|
|
351
|
+
const start = this.position;
|
|
352
|
+
const startLine = this.line;
|
|
353
|
+
const startColumn = this.column;
|
|
354
|
+
|
|
355
|
+
if (this.peek() === '/' && this.peek(1) === '*') {
|
|
356
|
+
this.advance(); // /
|
|
357
|
+
this.advance(); // *
|
|
358
|
+
|
|
359
|
+
while (this.position < this.input.length - 1) {
|
|
360
|
+
if (this.peek() === '*' && this.peek(1) === '/') {
|
|
361
|
+
this.advance(); // *
|
|
362
|
+
this.advance(); // /
|
|
363
|
+
break;
|
|
364
|
+
}
|
|
365
|
+
this.advance();
|
|
366
|
+
}
|
|
367
|
+
|
|
368
|
+
const token: Token = { type: TokenType.COMMENT, start, end: this.position, line: startLine, column: startColumn };
|
|
369
|
+
if (this.options.preserveTrivia) {
|
|
370
|
+
token.channel = Channel.HIDDEN;
|
|
371
|
+
}
|
|
372
|
+
return token;
|
|
373
|
+
}
|
|
374
|
+
|
|
375
|
+
if (this.peek() === '/' && this.peek(1) === '/') {
|
|
376
|
+
this.advance(); // /
|
|
377
|
+
this.advance(); // /
|
|
378
|
+
|
|
379
|
+
while (this.position < this.input.length && this.peek() !== '\n') {
|
|
380
|
+
this.advance();
|
|
381
|
+
}
|
|
382
|
+
|
|
383
|
+
const token: Token = { type: TokenType.LINE_COMMENT, start, end: this.position, line: startLine, column: startColumn };
|
|
384
|
+
if (this.options.preserveTrivia) {
|
|
385
|
+
token.channel = Channel.HIDDEN;
|
|
386
|
+
}
|
|
387
|
+
return token;
|
|
388
|
+
}
|
|
389
|
+
|
|
390
|
+
return null;
|
|
391
|
+
}
|
|
392
|
+
|
|
393
|
+
private readString(): Token | null {
|
|
394
|
+
const start = this.position;
|
|
395
|
+
const startLine = this.line;
|
|
396
|
+
const startColumn = this.column;
|
|
397
|
+
|
|
398
|
+
// Inline peekCharCode
|
|
399
|
+
if (this.position >= this.input.length) return null;
|
|
400
|
+
const firstCharCode = this.input.charCodeAt(this.position);
|
|
401
|
+
|
|
402
|
+
if (firstCharCode !== 39 && firstCharCode !== 34) { // ' and "
|
|
403
|
+
return null;
|
|
404
|
+
}
|
|
405
|
+
|
|
406
|
+
const quoteCharCode = firstCharCode;
|
|
407
|
+
// Inline advance
|
|
408
|
+
this.position++;
|
|
409
|
+
this.column++;
|
|
410
|
+
|
|
411
|
+
while (this.position < this.input.length) {
|
|
412
|
+
const charCode = this.input.charCodeAt(this.position);
|
|
413
|
+
|
|
414
|
+
if (charCode === quoteCharCode) {
|
|
415
|
+
// Inline advance
|
|
416
|
+
this.position++;
|
|
417
|
+
this.column++;
|
|
418
|
+
return { type: TokenType.STRING, start, end: this.position, line: startLine, column: startColumn };
|
|
419
|
+
}
|
|
420
|
+
|
|
421
|
+
if (charCode === 92) { // \
|
|
422
|
+
// Inline advance
|
|
423
|
+
this.position++;
|
|
424
|
+
this.column++;
|
|
425
|
+
|
|
426
|
+
if (this.position >= this.input.length) {
|
|
427
|
+
throw new Error(`Invalid escape sequence at position ${this.position}`);
|
|
428
|
+
}
|
|
429
|
+
const escapedCode = this.input.charCodeAt(this.position);
|
|
430
|
+
|
|
431
|
+
switch (escapedCode) {
|
|
432
|
+
case 96: // `
|
|
433
|
+
case 39: // '
|
|
434
|
+
case 34: // "
|
|
435
|
+
case 92: // \
|
|
436
|
+
case 47: // /
|
|
437
|
+
case 102: // f
|
|
438
|
+
case 110: // n
|
|
439
|
+
case 114: // r
|
|
440
|
+
case 116: // t
|
|
441
|
+
// Inline advance
|
|
442
|
+
this.position++;
|
|
443
|
+
this.column++;
|
|
444
|
+
break;
|
|
445
|
+
case 117: // u
|
|
446
|
+
// Inline advance
|
|
447
|
+
this.position++;
|
|
448
|
+
this.column++;
|
|
449
|
+
// Read 4 hex digits
|
|
450
|
+
for (let i = 0; i < 4; i++) {
|
|
451
|
+
if (this.position >= this.input.length) {
|
|
452
|
+
throw new Error(`Invalid unicode escape at position ${this.position}`);
|
|
453
|
+
}
|
|
454
|
+
const hexCode = this.input.charCodeAt(this.position);
|
|
455
|
+
if (IS_HEX_DIGIT[hexCode]) {
|
|
456
|
+
this.position++;
|
|
457
|
+
this.column++;
|
|
458
|
+
} else {
|
|
459
|
+
throw new Error(`Invalid unicode escape at position ${this.position}`);
|
|
460
|
+
}
|
|
461
|
+
}
|
|
462
|
+
break;
|
|
463
|
+
default:
|
|
464
|
+
const escaped = String.fromCharCode(escapedCode);
|
|
465
|
+
throw new Error(`Invalid escape sequence \\${escaped} at position ${this.position}`);
|
|
466
|
+
}
|
|
467
|
+
} else if (charCode === 10) { // \n
|
|
468
|
+
// Handle newline
|
|
469
|
+
this.position++;
|
|
470
|
+
this.line++;
|
|
471
|
+
this.column = 1;
|
|
472
|
+
} else {
|
|
473
|
+
// Regular character - inline advance
|
|
474
|
+
this.position++;
|
|
475
|
+
this.column++;
|
|
476
|
+
}
|
|
477
|
+
}
|
|
478
|
+
|
|
479
|
+
throw new Error(`Unterminated string at position ${start}`);
|
|
480
|
+
}
|
|
481
|
+
|
|
482
|
+
private readDelimitedIdentifier(): Token | null {
|
|
483
|
+
const start = this.position;
|
|
484
|
+
const startLine = this.line;
|
|
485
|
+
const startColumn = this.column;
|
|
486
|
+
if (this.peekCharCode() !== 96) { // `
|
|
487
|
+
return null;
|
|
488
|
+
}
|
|
489
|
+
|
|
490
|
+
this.advance(); // `
|
|
491
|
+
|
|
492
|
+
while (this.position < this.input.length) {
|
|
493
|
+
const charCode = this.peekCharCode();
|
|
494
|
+
|
|
495
|
+
if (charCode === 96) { // `
|
|
496
|
+
this.advance();
|
|
497
|
+
return { type: TokenType.DELIMITED_IDENTIFIER, start, end: this.position, line: startLine, column: startColumn };
|
|
498
|
+
}
|
|
499
|
+
|
|
500
|
+
if (charCode === 92) { // \
|
|
501
|
+
this.advance();
|
|
502
|
+
const escapedCode = this.peekCharCode();
|
|
503
|
+
if (escapedCode === 96 || escapedCode === 92) { // ` or \
|
|
504
|
+
this.advance();
|
|
505
|
+
}
|
|
506
|
+
} else {
|
|
507
|
+
this.advance();
|
|
508
|
+
}
|
|
509
|
+
}
|
|
510
|
+
|
|
511
|
+
throw new Error(`Unterminated delimited identifier at position ${start}`);
|
|
512
|
+
}
|
|
513
|
+
|
|
514
|
+
private readNumber(): Token | null {
|
|
515
|
+
const start = this.position;
|
|
516
|
+
const startLine = this.line;
|
|
517
|
+
const startColumn = this.column;
|
|
518
|
+
|
|
519
|
+
// Inline first digit check
|
|
520
|
+
if (this.position >= this.input.length) return null;
|
|
521
|
+
const firstCharCode = this.input.charCodeAt(this.position);
|
|
522
|
+
if (!IS_DIGIT[firstCharCode]) {
|
|
523
|
+
return null;
|
|
524
|
+
}
|
|
525
|
+
|
|
526
|
+
// Inline digit reading loop with inlined advance
|
|
527
|
+
while (this.position < this.input.length) {
|
|
528
|
+
const charCode = this.input.charCodeAt(this.position);
|
|
529
|
+
if (IS_DIGIT[charCode]) {
|
|
530
|
+
this.position++;
|
|
531
|
+
this.column++;
|
|
532
|
+
} else {
|
|
533
|
+
break;
|
|
534
|
+
}
|
|
535
|
+
}
|
|
536
|
+
|
|
537
|
+
// Check for decimal part
|
|
538
|
+
if (this.position < this.input.length && this.input[this.position] === '.') {
|
|
539
|
+
const nextPos = this.position + 1;
|
|
540
|
+
if (nextPos < this.input.length) {
|
|
541
|
+
const nextCharCode = this.input.charCodeAt(nextPos);
|
|
542
|
+
if (IS_DIGIT[nextCharCode]) {
|
|
543
|
+
this.position++; // consume '.'
|
|
544
|
+
// Inline decimal digit reading with inlined advance
|
|
545
|
+
while (this.position < this.input.length) {
|
|
546
|
+
const charCode = this.input.charCodeAt(this.position);
|
|
547
|
+
if (IS_DIGIT[charCode]) {
|
|
548
|
+
this.position++;
|
|
549
|
+
this.column++;
|
|
550
|
+
} else {
|
|
551
|
+
break;
|
|
552
|
+
}
|
|
553
|
+
}
|
|
554
|
+
}
|
|
555
|
+
}
|
|
556
|
+
}
|
|
557
|
+
|
|
558
|
+
return { type: TokenType.NUMBER, start, end: this.position, line: startLine, column: startColumn };
|
|
559
|
+
}
|
|
560
|
+
|
|
561
|
+
private readDateTime(): Token | null {
|
|
562
|
+
const start = this.position;
|
|
563
|
+
const startLine = this.line;
|
|
564
|
+
const startColumn = this.column;
|
|
565
|
+
if (this.peek() !== '@') {
|
|
566
|
+
return null;
|
|
567
|
+
}
|
|
568
|
+
|
|
569
|
+
const savedPosition = this.position;
|
|
570
|
+
const savedLine = this.line;
|
|
571
|
+
const savedColumn = this.column;
|
|
572
|
+
this.advance(); // @
|
|
573
|
+
|
|
574
|
+
// Check for time format first
|
|
575
|
+
if (this.peek() === 'T') {
|
|
576
|
+
this.advance(); // T
|
|
577
|
+
if (this.readTimeFormat()) {
|
|
578
|
+
return { type: TokenType.TIME, start, end: this.position, line: startLine, column: startColumn };
|
|
579
|
+
}
|
|
580
|
+
// Restore position if not a valid time
|
|
581
|
+
this.position = savedPosition;
|
|
582
|
+
this.line = savedLine;
|
|
583
|
+
this.column = savedColumn;
|
|
584
|
+
return null;
|
|
585
|
+
}
|
|
586
|
+
|
|
587
|
+
// Try to read datetime
|
|
588
|
+
// Year (4 digits)
|
|
589
|
+
for (let i = 0; i < 4; i++) {
|
|
590
|
+
const charCode = this.peekCharCode();
|
|
591
|
+
if (charCode === -1 || !IS_DIGIT[charCode]) {
|
|
592
|
+
this.position = savedPosition;
|
|
593
|
+
this.line = savedLine;
|
|
594
|
+
this.column = savedColumn;
|
|
595
|
+
return null;
|
|
596
|
+
}
|
|
597
|
+
this.advance();
|
|
598
|
+
}
|
|
599
|
+
|
|
600
|
+
// Optional month, day, time
|
|
601
|
+
if (this.peek() === '-') {
|
|
602
|
+
this.advance();
|
|
603
|
+
// Month
|
|
604
|
+
const monthChar0 = this.peekCharCode();
|
|
605
|
+
const monthChar1 = this.peekCharCode(1);
|
|
606
|
+
if (monthChar0 === -1 || !IS_DIGIT[monthChar0] ||
|
|
607
|
+
monthChar1 === -1 || !IS_DIGIT[monthChar1]) {
|
|
608
|
+
this.position = savedPosition;
|
|
609
|
+
this.line = savedLine;
|
|
610
|
+
this.column = savedColumn;
|
|
611
|
+
return null;
|
|
612
|
+
}
|
|
613
|
+
this.advance();
|
|
614
|
+
this.advance();
|
|
615
|
+
|
|
616
|
+
// Optional day
|
|
617
|
+
if (this.peek() === '-') {
|
|
618
|
+
this.advance();
|
|
619
|
+
const dayChar0 = this.peekCharCode();
|
|
620
|
+
const dayChar1 = this.peekCharCode(1);
|
|
621
|
+
if (dayChar0 === -1 || !IS_DIGIT[dayChar0] ||
|
|
622
|
+
dayChar1 === -1 || !IS_DIGIT[dayChar1]) {
|
|
623
|
+
this.position = savedPosition;
|
|
624
|
+
this.line = savedLine;
|
|
625
|
+
this.column = savedColumn;
|
|
626
|
+
return null;
|
|
627
|
+
}
|
|
628
|
+
this.advance();
|
|
629
|
+
this.advance();
|
|
630
|
+
|
|
631
|
+
// Optional time
|
|
632
|
+
if (this.peek() === 'T') {
|
|
633
|
+
this.advance();
|
|
634
|
+
this.readTimeFormat();
|
|
635
|
+
}
|
|
636
|
+
} else if (this.peek() === 'T') {
|
|
637
|
+
this.advance();
|
|
638
|
+
}
|
|
639
|
+
} else if (this.peek() === 'T') {
|
|
640
|
+
this.advance();
|
|
641
|
+
}
|
|
642
|
+
|
|
643
|
+
// Optional timezone
|
|
644
|
+
if (this.peek() === 'Z') {
|
|
645
|
+
this.advance();
|
|
646
|
+
} else if (this.peek() === '+' || this.peek() === '-') {
|
|
647
|
+
this.advance();
|
|
648
|
+
const tzChar0 = this.peekCharCode();
|
|
649
|
+
const tzChar1 = this.peekCharCode(1);
|
|
650
|
+
if (tzChar0 === -1 || !IS_DIGIT[tzChar0] ||
|
|
651
|
+
tzChar1 === -1 || !IS_DIGIT[tzChar1]) {
|
|
652
|
+
// Invalid timezone offset
|
|
653
|
+
} else {
|
|
654
|
+
this.advance();
|
|
655
|
+
this.advance();
|
|
656
|
+
if (this.peek() === ':') {
|
|
657
|
+
this.advance();
|
|
658
|
+
const tzMinChar0 = this.peekCharCode();
|
|
659
|
+
const tzMinChar1 = this.peekCharCode(1);
|
|
660
|
+
if (tzMinChar0 !== -1 && IS_DIGIT[tzMinChar0] &&
|
|
661
|
+
tzMinChar1 !== -1 && IS_DIGIT[tzMinChar1]) {
|
|
662
|
+
this.advance();
|
|
663
|
+
this.advance();
|
|
664
|
+
}
|
|
665
|
+
}
|
|
666
|
+
}
|
|
667
|
+
}
|
|
668
|
+
|
|
669
|
+
return { type: TokenType.DATETIME, start, end: this.position, line: startLine, column: startColumn };
|
|
670
|
+
}
|
|
671
|
+
|
|
672
|
+
private readTimeFormat(): boolean {
|
|
673
|
+
// HH
|
|
674
|
+
const hhChar0 = this.peekCharCode();
|
|
675
|
+
const hhChar1 = this.peekCharCode(1);
|
|
676
|
+
if (hhChar0 === -1 || !IS_DIGIT[hhChar0] ||
|
|
677
|
+
hhChar1 === -1 || !IS_DIGIT[hhChar1]) {
|
|
678
|
+
return false;
|
|
679
|
+
}
|
|
680
|
+
this.advance();
|
|
681
|
+
this.advance();
|
|
682
|
+
|
|
683
|
+
// Optional :MM
|
|
684
|
+
if (this.peek() === ':') {
|
|
685
|
+
this.advance();
|
|
686
|
+
const mmChar0 = this.peekCharCode();
|
|
687
|
+
const mmChar1 = this.peekCharCode(1);
|
|
688
|
+
if (mmChar0 === -1 || !IS_DIGIT[mmChar0] ||
|
|
689
|
+
mmChar1 === -1 || !IS_DIGIT[mmChar1]) {
|
|
690
|
+
return false;
|
|
691
|
+
}
|
|
692
|
+
this.advance();
|
|
693
|
+
this.advance();
|
|
694
|
+
|
|
695
|
+
// Optional :SS
|
|
696
|
+
if (this.peek() === ':') {
|
|
697
|
+
this.advance();
|
|
698
|
+
const ssChar0 = this.peekCharCode();
|
|
699
|
+
const ssChar1 = this.peekCharCode(1);
|
|
700
|
+
if (ssChar0 === -1 || !IS_DIGIT[ssChar0] ||
|
|
701
|
+
ssChar1 === -1 || !IS_DIGIT[ssChar1]) {
|
|
702
|
+
return false;
|
|
703
|
+
}
|
|
704
|
+
this.advance();
|
|
705
|
+
this.advance();
|
|
706
|
+
|
|
707
|
+
// Optional .fraction
|
|
708
|
+
if (this.peek() === '.') {
|
|
709
|
+
this.advance();
|
|
710
|
+
const fracChar = this.peekCharCode();
|
|
711
|
+
if (fracChar === -1 || !IS_DIGIT[fracChar]) {
|
|
712
|
+
return false;
|
|
713
|
+
}
|
|
714
|
+
while (this.position < this.input.length) {
|
|
715
|
+
const charCode = this.peekCharCode();
|
|
716
|
+
if (charCode !== -1 && IS_DIGIT[charCode]) {
|
|
717
|
+
this.advance();
|
|
718
|
+
} else {
|
|
719
|
+
break;
|
|
720
|
+
}
|
|
721
|
+
}
|
|
722
|
+
}
|
|
723
|
+
}
|
|
724
|
+
}
|
|
725
|
+
|
|
726
|
+
return true;
|
|
727
|
+
}
|
|
728
|
+
|
|
729
|
+
private readIdentifierOrKeyword(): Token | null {
|
|
730
|
+
const start = this.position;
|
|
731
|
+
const startLine = this.line;
|
|
732
|
+
const startColumn = this.column;
|
|
733
|
+
|
|
734
|
+
// Inline first letter check
|
|
735
|
+
if (this.position >= this.input.length) return null;
|
|
736
|
+
const firstCharCode = this.input.charCodeAt(this.position);
|
|
737
|
+
if (!IS_LETTER[firstCharCode]) {
|
|
738
|
+
return null;
|
|
739
|
+
}
|
|
740
|
+
|
|
741
|
+
// Inline letter/digit reading loop with inlined advance
|
|
742
|
+
while (this.position < this.input.length) {
|
|
743
|
+
const charCode = this.input.charCodeAt(this.position);
|
|
744
|
+
if (IS_LETTER_OR_DIGIT[charCode]) {
|
|
745
|
+
this.position++;
|
|
746
|
+
this.column++;
|
|
747
|
+
} else {
|
|
748
|
+
break;
|
|
749
|
+
}
|
|
750
|
+
}
|
|
751
|
+
|
|
752
|
+
const length = this.position - start;
|
|
753
|
+
|
|
754
|
+
// Check for keywords directly from input buffer without substring
|
|
755
|
+
let type: TokenType = TokenType.IDENTIFIER;
|
|
756
|
+
const input = this.input;
|
|
757
|
+
|
|
758
|
+
switch (length) {
|
|
759
|
+
case 2:
|
|
760
|
+
const c0_2 = input.charCodeAt(start);
|
|
761
|
+
const c1_2 = input.charCodeAt(start + 1);
|
|
762
|
+
if (c0_2 === 97 && c1_2 === 115) type = TokenType.AS; // 'as'
|
|
763
|
+
else if (c0_2 === 105 && c1_2 === 110) type = TokenType.IN; // 'in'
|
|
764
|
+
else if (c0_2 === 105 && c1_2 === 115) type = TokenType.IS; // 'is'
|
|
765
|
+
else if (c0_2 === 111 && c1_2 === 114) type = TokenType.OR; // 'or'
|
|
766
|
+
break;
|
|
767
|
+
case 3:
|
|
768
|
+
const c0_3 = input.charCodeAt(start);
|
|
769
|
+
const c1_3 = input.charCodeAt(start + 1);
|
|
770
|
+
const c2_3 = input.charCodeAt(start + 2);
|
|
771
|
+
if (c0_3 === 100 && c1_3 === 105 && c2_3 === 118) type = TokenType.DIV; // 'div'
|
|
772
|
+
else if (c0_3 === 109 && c1_3 === 111 && c2_3 === 100) type = TokenType.MOD; // 'mod'
|
|
773
|
+
else if (c0_3 === 97 && c1_3 === 110 && c2_3 === 100) type = TokenType.AND; // 'and'
|
|
774
|
+
else if (c0_3 === 120 && c1_3 === 111 && c2_3 === 114) type = TokenType.XOR; // 'xor'
|
|
775
|
+
else if (c0_3 === 100 && c1_3 === 97 && c2_3 === 121) type = TokenType.DAY; // 'day'
|
|
776
|
+
break;
|
|
777
|
+
case 4:
|
|
778
|
+
const c0_4 = input.charCodeAt(start);
|
|
779
|
+
if (c0_4 === 116 && // 't'
|
|
780
|
+
input.charCodeAt(start + 1) === 114 && // 'r'
|
|
781
|
+
input.charCodeAt(start + 2) === 117 && // 'u'
|
|
782
|
+
input.charCodeAt(start + 3) === 101) { // 'e'
|
|
783
|
+
type = TokenType.TRUE;
|
|
784
|
+
} else if (c0_4 === 121 && // 'y'
|
|
785
|
+
input.charCodeAt(start + 1) === 101 && // 'e'
|
|
786
|
+
input.charCodeAt(start + 2) === 97 && // 'a'
|
|
787
|
+
input.charCodeAt(start + 3) === 114) { // 'r'
|
|
788
|
+
type = TokenType.YEAR;
|
|
789
|
+
} else if (c0_4 === 119 && // 'w'
|
|
790
|
+
input.charCodeAt(start + 1) === 101 && // 'e'
|
|
791
|
+
input.charCodeAt(start + 2) === 101 && // 'e'
|
|
792
|
+
input.charCodeAt(start + 3) === 107) { // 'k'
|
|
793
|
+
type = TokenType.WEEK;
|
|
794
|
+
} else if (c0_4 === 104 && // 'h'
|
|
795
|
+
input.charCodeAt(start + 1) === 111 && // 'o'
|
|
796
|
+
input.charCodeAt(start + 2) === 117 && // 'u'
|
|
797
|
+
input.charCodeAt(start + 3) === 114) { // 'r'
|
|
798
|
+
type = TokenType.HOUR;
|
|
799
|
+
} else if (c0_4 === 100 && // 'd'
|
|
800
|
+
input.charCodeAt(start + 1) === 97 && // 'a'
|
|
801
|
+
input.charCodeAt(start + 2) === 121 && // 'y'
|
|
802
|
+
input.charCodeAt(start + 3) === 115) { // 's'
|
|
803
|
+
type = TokenType.DAYS;
|
|
804
|
+
}
|
|
805
|
+
break;
|
|
806
|
+
case 5:
|
|
807
|
+
const c0_5 = input.charCodeAt(start);
|
|
808
|
+
if (c0_5 === 102 && // 'f'
|
|
809
|
+
input.charCodeAt(start + 1) === 97 && // 'a'
|
|
810
|
+
input.charCodeAt(start + 2) === 108 && // 'l'
|
|
811
|
+
input.charCodeAt(start + 3) === 115 && // 's'
|
|
812
|
+
input.charCodeAt(start + 4) === 101) { // 'e'
|
|
813
|
+
type = TokenType.FALSE;
|
|
814
|
+
} else if (c0_5 === 109 && // 'm'
|
|
815
|
+
input.charCodeAt(start + 1) === 111 && // 'o'
|
|
816
|
+
input.charCodeAt(start + 2) === 110 && // 'n'
|
|
817
|
+
input.charCodeAt(start + 3) === 116 && // 't'
|
|
818
|
+
input.charCodeAt(start + 4) === 104) { // 'h'
|
|
819
|
+
type = TokenType.MONTH;
|
|
820
|
+
} else if (c0_5 === 119 && // 'w'
|
|
821
|
+
input.charCodeAt(start + 1) === 101 && // 'e'
|
|
822
|
+
input.charCodeAt(start + 2) === 101 && // 'e'
|
|
823
|
+
input.charCodeAt(start + 3) === 107 && // 'k'
|
|
824
|
+
input.charCodeAt(start + 4) === 115) { // 's'
|
|
825
|
+
type = TokenType.WEEKS;
|
|
826
|
+
} else if (c0_5 === 121 && // 'y'
|
|
827
|
+
input.charCodeAt(start + 1) === 101 && // 'e'
|
|
828
|
+
input.charCodeAt(start + 2) === 97 && // 'a'
|
|
829
|
+
input.charCodeAt(start + 3) === 114 && // 'r'
|
|
830
|
+
input.charCodeAt(start + 4) === 115) { // 's'
|
|
831
|
+
type = TokenType.YEARS;
|
|
832
|
+
} else if (c0_5 === 104 && // 'h'
|
|
833
|
+
input.charCodeAt(start + 1) === 111 && // 'o'
|
|
834
|
+
input.charCodeAt(start + 2) === 117 && // 'u'
|
|
835
|
+
input.charCodeAt(start + 3) === 114 && // 'r'
|
|
836
|
+
input.charCodeAt(start + 4) === 115) { // 's'
|
|
837
|
+
type = TokenType.HOURS;
|
|
838
|
+
}
|
|
839
|
+
break;
|
|
840
|
+
default:
|
|
841
|
+
// For longer keywords, fall back to substring
|
|
842
|
+
const value = input.substring(start, this.position);
|
|
843
|
+
switch (length) {
|
|
844
|
+
case 6:
|
|
845
|
+
if (value === 'minute') type = TokenType.MINUTE;
|
|
846
|
+
else if (value === 'second') type = TokenType.SECOND;
|
|
847
|
+
else if (value === 'months') type = TokenType.MONTHS;
|
|
848
|
+
break;
|
|
849
|
+
case 7:
|
|
850
|
+
if (value === 'implies') type = TokenType.IMPLIES;
|
|
851
|
+
else if (value === 'minutes') type = TokenType.MINUTES;
|
|
852
|
+
else if (value === 'seconds') type = TokenType.SECONDS;
|
|
853
|
+
break;
|
|
854
|
+
case 8:
|
|
855
|
+
if (value === 'contains') type = TokenType.CONTAINS;
|
|
856
|
+
break;
|
|
857
|
+
case 11:
|
|
858
|
+
if (value === 'millisecond') type = TokenType.MILLISECOND;
|
|
859
|
+
break;
|
|
860
|
+
case 12:
|
|
861
|
+
if (value === 'milliseconds') type = TokenType.MILLISECONDS;
|
|
862
|
+
break;
|
|
863
|
+
}
|
|
864
|
+
break;
|
|
865
|
+
}
|
|
866
|
+
|
|
867
|
+
return { type, start, end: this.position, line: startLine, column: startColumn };
|
|
868
|
+
}
|
|
869
|
+
|
|
870
|
+
private readSpecialIdentifier(): Token | null {
|
|
871
|
+
const start = this.position;
|
|
872
|
+
const startLine = this.line;
|
|
873
|
+
const startColumn = this.column;
|
|
874
|
+
if (this.position >= this.input.length || this.input.charCodeAt(this.position) !== 36) { // $
|
|
875
|
+
return null;
|
|
876
|
+
}
|
|
877
|
+
|
|
878
|
+
const len = this.input.length;
|
|
879
|
+
const pos = this.position;
|
|
880
|
+
|
|
881
|
+
// Check for $this (5 chars)
|
|
882
|
+
if (pos + 4 < len &&
|
|
883
|
+
this.input.charCodeAt(pos + 1) === 116 && // t
|
|
884
|
+
this.input.charCodeAt(pos + 2) === 104 && // h
|
|
885
|
+
this.input.charCodeAt(pos + 3) === 105 && // i
|
|
886
|
+
this.input.charCodeAt(pos + 4) === 115) { // s
|
|
887
|
+
this.position += 5;
|
|
888
|
+
this.column += 5;
|
|
889
|
+
return { type: TokenType.THIS, start, end: this.position, line: startLine, column: startColumn };
|
|
890
|
+
}
|
|
891
|
+
|
|
892
|
+
// Check for $index (6 chars)
|
|
893
|
+
if (pos + 5 < len &&
|
|
894
|
+
this.input.charCodeAt(pos + 1) === 105 && // i
|
|
895
|
+
this.input.charCodeAt(pos + 2) === 110 && // n
|
|
896
|
+
this.input.charCodeAt(pos + 3) === 100 && // d
|
|
897
|
+
this.input.charCodeAt(pos + 4) === 101 && // e
|
|
898
|
+
this.input.charCodeAt(pos + 5) === 120) { // x
|
|
899
|
+
this.position += 6;
|
|
900
|
+
this.column += 6;
|
|
901
|
+
return { type: TokenType.INDEX, start, end: this.position, line: startLine, column: startColumn };
|
|
902
|
+
}
|
|
903
|
+
|
|
904
|
+
// Check for $total (6 chars)
|
|
905
|
+
if (pos + 5 < len &&
|
|
906
|
+
this.input.charCodeAt(pos + 1) === 116 && // t
|
|
907
|
+
this.input.charCodeAt(pos + 2) === 111 && // o
|
|
908
|
+
this.input.charCodeAt(pos + 3) === 116 && // t
|
|
909
|
+
this.input.charCodeAt(pos + 4) === 97 && // a
|
|
910
|
+
this.input.charCodeAt(pos + 5) === 108) { // l
|
|
911
|
+
this.position += 6;
|
|
912
|
+
this.column += 6;
|
|
913
|
+
return { type: TokenType.TOTAL, start, end: this.position, line: startLine, column: startColumn };
|
|
914
|
+
}
|
|
915
|
+
|
|
916
|
+
return null;
|
|
917
|
+
}
|
|
918
|
+
|
|
919
|
+
private readEnvVar(): Token | null {
|
|
920
|
+
const start = this.position;
|
|
921
|
+
const startLine = this.line;
|
|
922
|
+
const startColumn = this.column;
|
|
923
|
+
|
|
924
|
+
if (this.peekCharCode() !== 37) { // %
|
|
925
|
+
return null;
|
|
926
|
+
}
|
|
927
|
+
|
|
928
|
+
this.advance(); // %
|
|
929
|
+
|
|
930
|
+
// Check what follows the %
|
|
931
|
+
const nextCharCode = this.peekCharCode();
|
|
932
|
+
|
|
933
|
+
if (nextCharCode === 39) { // '
|
|
934
|
+
// String form: %'string'
|
|
935
|
+
this.advance(); // '
|
|
936
|
+
|
|
937
|
+
while (this.position < this.input.length) {
|
|
938
|
+
const charCode = this.peekCharCode();
|
|
939
|
+
|
|
940
|
+
if (charCode === 39) { // '
|
|
941
|
+
this.advance();
|
|
942
|
+
return { type: TokenType.ENV_VAR, start, end: this.position, line: startLine, column: startColumn };
|
|
943
|
+
}
|
|
944
|
+
|
|
945
|
+
if (charCode === 92) { // \
|
|
946
|
+
this.advance();
|
|
947
|
+
const escapedCode = this.peekCharCode();
|
|
948
|
+
switch (escapedCode) {
|
|
949
|
+
case 39: // '
|
|
950
|
+
case 92: // \
|
|
951
|
+
case 47: // /
|
|
952
|
+
case 102: // f
|
|
953
|
+
case 110: // n
|
|
954
|
+
case 114: // r
|
|
955
|
+
case 116: // t
|
|
956
|
+
this.advance();
|
|
957
|
+
break;
|
|
958
|
+
case 117: // u
|
|
959
|
+
this.advance();
|
|
960
|
+
for (let i = 0; i < 4; i++) {
|
|
961
|
+
const hexCode = this.peekCharCode();
|
|
962
|
+
if (hexCode !== -1 && IS_HEX_DIGIT[hexCode]) {
|
|
963
|
+
this.advance();
|
|
964
|
+
} else {
|
|
965
|
+
throw new Error(`Invalid unicode escape in environment variable at position ${this.position}`);
|
|
966
|
+
}
|
|
967
|
+
}
|
|
968
|
+
break;
|
|
969
|
+
default:
|
|
970
|
+
const escaped = escapedCode === -1 ? '' : String.fromCharCode(escapedCode);
|
|
971
|
+
throw new Error(`Invalid escape sequence \\${escaped} in environment variable at position ${this.position}`);
|
|
972
|
+
}
|
|
973
|
+
} else {
|
|
974
|
+
this.advance();
|
|
975
|
+
}
|
|
976
|
+
}
|
|
977
|
+
|
|
978
|
+
throw new Error(`Unterminated environment variable string at position ${start}`);
|
|
979
|
+
|
|
980
|
+
} else if (nextCharCode === 96) { // `
|
|
981
|
+
// Delimited form: %`delimited`
|
|
982
|
+
this.advance(); // `
|
|
983
|
+
|
|
984
|
+
while (this.position < this.input.length) {
|
|
985
|
+
const charCode = this.peekCharCode();
|
|
986
|
+
|
|
987
|
+
if (charCode === 96) { // `
|
|
988
|
+
this.advance();
|
|
989
|
+
return { type: TokenType.ENV_VAR, start, end: this.position, line: startLine, column: startColumn };
|
|
990
|
+
}
|
|
991
|
+
|
|
992
|
+
if (charCode === 92) { // \
|
|
993
|
+
this.advance();
|
|
994
|
+
const escapedCode = this.peekCharCode();
|
|
995
|
+
if (escapedCode === 96 || escapedCode === 92) { // ` or \
|
|
996
|
+
this.advance();
|
|
997
|
+
}
|
|
998
|
+
} else {
|
|
999
|
+
this.advance();
|
|
1000
|
+
}
|
|
1001
|
+
}
|
|
1002
|
+
|
|
1003
|
+
throw new Error(`Unterminated environment variable delimiter at position ${start}`);
|
|
1004
|
+
|
|
1005
|
+
} else {
|
|
1006
|
+
// Identifier form: %identifier (ASCII only per spec)
|
|
1007
|
+
const firstCharCode = this.peekCharCode();
|
|
1008
|
+
if (firstCharCode !== -1 && IS_LETTER[firstCharCode]) {
|
|
1009
|
+
// Read identifier
|
|
1010
|
+
while (this.position < this.input.length) {
|
|
1011
|
+
const charCode = this.peekCharCode();
|
|
1012
|
+
if (charCode !== -1 && IS_LETTER_OR_DIGIT[charCode]) {
|
|
1013
|
+
this.advance();
|
|
1014
|
+
} else {
|
|
1015
|
+
break;
|
|
1016
|
+
}
|
|
1017
|
+
}
|
|
1018
|
+
|
|
1019
|
+
return { type: TokenType.ENV_VAR, start, end: this.position, line: startLine, column: startColumn };
|
|
1020
|
+
} else {
|
|
1021
|
+
// Just a percent sign, not an env var
|
|
1022
|
+
this.position = start;
|
|
1023
|
+
this.line = startLine;
|
|
1024
|
+
this.column = startColumn;
|
|
1025
|
+
return null;
|
|
1026
|
+
}
|
|
1027
|
+
}
|
|
1028
|
+
}
|
|
1029
|
+
|
|
1030
|
+
public nextToken(): Token {
|
|
1031
|
+
// Skip whitespace and comments
|
|
1032
|
+
while (this.position < this.input.length) {
|
|
1033
|
+
const wsToken = this.readWhitespace();
|
|
1034
|
+
if (wsToken) {
|
|
1035
|
+
if (this.options.preserveTrivia || !this.options.skipWhitespace) return wsToken;
|
|
1036
|
+
continue;
|
|
1037
|
+
}
|
|
1038
|
+
|
|
1039
|
+
const commentToken = this.readComment();
|
|
1040
|
+
if (commentToken) {
|
|
1041
|
+
if (this.options.preserveTrivia || !this.options.skipComments) return commentToken;
|
|
1042
|
+
continue;
|
|
1043
|
+
}
|
|
1044
|
+
|
|
1045
|
+
break;
|
|
1046
|
+
}
|
|
1047
|
+
|
|
1048
|
+
if (this.position >= this.input.length) {
|
|
1049
|
+
return { type: TokenType.EOF, start: this.position, end: this.position, line: this.line, column: this.column };
|
|
1050
|
+
}
|
|
1051
|
+
|
|
1052
|
+
const start = this.position;
|
|
1053
|
+
const startLine = this.line;
|
|
1054
|
+
const startColumn = this.column;
|
|
1055
|
+
const firstCharCode = this.peekCharCode();
|
|
1056
|
+
|
|
1057
|
+
// Switch on character code for faster dispatch
|
|
1058
|
+
switch (firstCharCode) {
|
|
1059
|
+
// String literals
|
|
1060
|
+
case 39: // '
|
|
1061
|
+
case 34: // "
|
|
1062
|
+
return this.readString() || this.throwUnexpectedChar(String.fromCharCode(firstCharCode));
|
|
1063
|
+
|
|
1064
|
+
// Delimited identifier
|
|
1065
|
+
case 96: // `
|
|
1066
|
+
return this.readDelimitedIdentifier() || this.throwUnexpectedChar(String.fromCharCode(firstCharCode));
|
|
1067
|
+
|
|
1068
|
+
// DateTime/Time or AT operator
|
|
1069
|
+
case 64: // @
|
|
1070
|
+
const dt = this.readDateTime();
|
|
1071
|
+
if (dt) return dt;
|
|
1072
|
+
// If not datetime, it's AT operator
|
|
1073
|
+
this.advance();
|
|
1074
|
+
return { type: TokenType.AT, start, end: this.position, line: startLine, column: startColumn };
|
|
1075
|
+
|
|
1076
|
+
// Special identifiers
|
|
1077
|
+
case 36: // $
|
|
1078
|
+
return this.readSpecialIdentifier() || this.throwUnexpectedChar(String.fromCharCode(firstCharCode));
|
|
1079
|
+
|
|
1080
|
+
// Environment variable or percent
|
|
1081
|
+
case 37: // %
|
|
1082
|
+
const envVar = this.readEnvVar();
|
|
1083
|
+
if (envVar) return envVar;
|
|
1084
|
+
// If not an env var, it's just a percent operator
|
|
1085
|
+
this.position++;
|
|
1086
|
+
this.column++;
|
|
1087
|
+
return { type: TokenType.PERCENT, start, end: this.position, line: startLine, column: startColumn };
|
|
1088
|
+
|
|
1089
|
+
// Single-character operators
|
|
1090
|
+
case 46: // .
|
|
1091
|
+
this.position++;
|
|
1092
|
+
this.column++;
|
|
1093
|
+
return { type: TokenType.DOT, start, end: this.position, line: startLine, column: startColumn };
|
|
1094
|
+
case 40: // (
|
|
1095
|
+
this.position++;
|
|
1096
|
+
this.column++;
|
|
1097
|
+
return { type: TokenType.LPAREN, start, end: this.position, line: startLine, column: startColumn };
|
|
1098
|
+
case 41: // )
|
|
1099
|
+
this.position++;
|
|
1100
|
+
this.column++;
|
|
1101
|
+
return { type: TokenType.RPAREN, start, end: this.position, line: startLine, column: startColumn };
|
|
1102
|
+
case 91: // [
|
|
1103
|
+
this.advance();
|
|
1104
|
+
return { type: TokenType.LBRACKET, start, end: this.position, line: startLine, column: startColumn };
|
|
1105
|
+
case 93: // ]
|
|
1106
|
+
this.advance();
|
|
1107
|
+
return { type: TokenType.RBRACKET, start, end: this.position, line: startLine, column: startColumn };
|
|
1108
|
+
case 123: // {
|
|
1109
|
+
this.advance();
|
|
1110
|
+
return { type: TokenType.LBRACE, start, end: this.position, line: startLine, column: startColumn };
|
|
1111
|
+
case 125: // }
|
|
1112
|
+
this.advance();
|
|
1113
|
+
return { type: TokenType.RBRACE, start, end: this.position, line: startLine, column: startColumn };
|
|
1114
|
+
case 43: // +
|
|
1115
|
+
this.position++;
|
|
1116
|
+
this.column++;
|
|
1117
|
+
return { type: TokenType.PLUS, start, end: this.position, line: startLine, column: startColumn };
|
|
1118
|
+
case 45: // -
|
|
1119
|
+
this.position++;
|
|
1120
|
+
this.column++;
|
|
1121
|
+
return { type: TokenType.MINUS, start, end: this.position, line: startLine, column: startColumn };
|
|
1122
|
+
case 42: // *
|
|
1123
|
+
this.position++;
|
|
1124
|
+
this.column++;
|
|
1125
|
+
return { type: TokenType.MULTIPLY, start, end: this.position, line: startLine, column: startColumn };
|
|
1126
|
+
case 47: // /
|
|
1127
|
+
this.position++;
|
|
1128
|
+
this.column++;
|
|
1129
|
+
return { type: TokenType.DIVIDE, start, end: this.position, line: startLine, column: startColumn };
|
|
1130
|
+
case 38: // &
|
|
1131
|
+
this.position++;
|
|
1132
|
+
this.column++;
|
|
1133
|
+
return { type: TokenType.AMPERSAND, start, end: this.position, line: startLine, column: startColumn };
|
|
1134
|
+
case 124: // |
|
|
1135
|
+
this.position++;
|
|
1136
|
+
this.column++;
|
|
1137
|
+
return { type: TokenType.PIPE, start, end: this.position, line: startLine, column: startColumn };
|
|
1138
|
+
case 126: // ~
|
|
1139
|
+
this.advance();
|
|
1140
|
+
return { type: TokenType.SIMILAR, start, end: this.position, line: startLine, column: startColumn };
|
|
1141
|
+
case 44: // ,
|
|
1142
|
+
this.advance();
|
|
1143
|
+
return { type: TokenType.COMMA, start, end: this.position, line: startLine, column: startColumn };
|
|
1144
|
+
case 61: // =
|
|
1145
|
+
this.position++;
|
|
1146
|
+
this.column++;
|
|
1147
|
+
return { type: TokenType.EQ, start, end: this.position, line: startLine, column: startColumn };
|
|
1148
|
+
|
|
1149
|
+
// Two-character operators starting with <
|
|
1150
|
+
case 60: // <
|
|
1151
|
+
this.position++;
|
|
1152
|
+
this.column++;
|
|
1153
|
+
if (this.peekCharCode() === 61) { // =
|
|
1154
|
+
this.position++;
|
|
1155
|
+
this.column++;
|
|
1156
|
+
return { type: TokenType.LTE, start, end: this.position, line: startLine, column: startColumn };
|
|
1157
|
+
}
|
|
1158
|
+
return { type: TokenType.LT, start, end: this.position, line: startLine, column: startColumn };
|
|
1159
|
+
|
|
1160
|
+
// Two-character operators starting with >
|
|
1161
|
+
case 62: // >
|
|
1162
|
+
this.position++;
|
|
1163
|
+
this.column++;
|
|
1164
|
+
if (this.peekCharCode() === 61) { // =
|
|
1165
|
+
this.position++;
|
|
1166
|
+
this.column++;
|
|
1167
|
+
return { type: TokenType.GTE, start, end: this.position, line: startLine, column: startColumn };
|
|
1168
|
+
}
|
|
1169
|
+
return { type: TokenType.GT, start, end: this.position, line: startLine, column: startColumn };
|
|
1170
|
+
|
|
1171
|
+
// Two-character operators starting with !
|
|
1172
|
+
case 33: // !
|
|
1173
|
+
this.position++;
|
|
1174
|
+
this.column++;
|
|
1175
|
+
const nextCharCode = this.peekCharCode();
|
|
1176
|
+
if (nextCharCode === 61) { // =
|
|
1177
|
+
this.position++;
|
|
1178
|
+
this.column++;
|
|
1179
|
+
return { type: TokenType.NEQ, start, end: this.position, line: startLine, column: startColumn };
|
|
1180
|
+
} else if (nextCharCode === 126) { // ~
|
|
1181
|
+
this.position++;
|
|
1182
|
+
this.column++;
|
|
1183
|
+
return { type: TokenType.NOT_SIMILAR, start, end: this.position, line: startLine, column: startColumn };
|
|
1184
|
+
}
|
|
1185
|
+
throw new Error(`Unexpected character '!' at position ${this.position - 1}`);
|
|
1186
|
+
|
|
1187
|
+
// EOF
|
|
1188
|
+
case -1:
|
|
1189
|
+
return { type: TokenType.EOF, start: this.position, end: this.position, line: this.line, column: this.column };
|
|
1190
|
+
|
|
1191
|
+
default:
|
|
1192
|
+
// Check if it's a digit (0-9)
|
|
1193
|
+
if (IS_DIGIT[firstCharCode]) {
|
|
1194
|
+
return this.readNumber() || this.throwUnexpectedChar(String.fromCharCode(firstCharCode));
|
|
1195
|
+
}
|
|
1196
|
+
|
|
1197
|
+
// Check if it's a letter (A-Z, a-z, _)
|
|
1198
|
+
if (IS_LETTER[firstCharCode]) {
|
|
1199
|
+
return this.readIdentifierOrKeyword() || this.throwUnexpectedChar(String.fromCharCode(firstCharCode));
|
|
1200
|
+
}
|
|
1201
|
+
|
|
1202
|
+
// Unknown character
|
|
1203
|
+
const unknownChar = String.fromCharCode(firstCharCode);
|
|
1204
|
+
throw new Error(`Unexpected character '${unknownChar}' at position ${this.position}`);
|
|
1205
|
+
}
|
|
1206
|
+
}
|
|
1207
|
+
|
|
1208
|
+
public tokenize(): Token[] {
|
|
1209
|
+
const tokens: Token[] = [];
|
|
1210
|
+
|
|
1211
|
+
while (true) {
|
|
1212
|
+
const token = this.nextToken();
|
|
1213
|
+
tokens.push(token);
|
|
1214
|
+
if (token.type === TokenType.EOF) {
|
|
1215
|
+
break;
|
|
1216
|
+
}
|
|
1217
|
+
}
|
|
1218
|
+
|
|
1219
|
+
return tokens;
|
|
1220
|
+
}
|
|
1221
|
+
|
|
1222
|
+
public getTokenValue(token: Token): string {
|
|
1223
|
+
return this.input.substring(token.start, token.end);
|
|
1224
|
+
}
|
|
1225
|
+
|
|
1226
|
+
// Debug helper to print tokens in human-readable format
|
|
1227
|
+
public debugTokens(tokens?: Token[]): string {
|
|
1228
|
+
if (!tokens) {
|
|
1229
|
+
// Save current position and reset
|
|
1230
|
+
const savedPosition = this.position;
|
|
1231
|
+
const savedLine = this.line;
|
|
1232
|
+
const savedColumn = this.column;
|
|
1233
|
+
this.position = 0;
|
|
1234
|
+
this.line = 1;
|
|
1235
|
+
this.column = 1;
|
|
1236
|
+
|
|
1237
|
+
tokens = this.tokenize();
|
|
1238
|
+
|
|
1239
|
+
// Restore position
|
|
1240
|
+
this.position = savedPosition;
|
|
1241
|
+
this.line = savedLine;
|
|
1242
|
+
this.column = savedColumn;
|
|
1243
|
+
}
|
|
1244
|
+
|
|
1245
|
+
return tokens.map(token => {
|
|
1246
|
+
const value = this.getTokenValue(token);
|
|
1247
|
+
const type = tokenTypeToString(token.type);
|
|
1248
|
+
return `${type}(${value}) [${token.line}:${token.column}]`;
|
|
1249
|
+
}).join('\n');
|
|
1250
|
+
}
|
|
1251
|
+
}
|