@quereus/quereus 0.6.12 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/dist/src/parser/lexer.d.ts +6 -0
  2. package/dist/src/parser/lexer.d.ts.map +1 -1
  3. package/dist/src/parser/lexer.js +33 -1
  4. package/dist/src/parser/lexer.js.map +1 -1
  5. package/dist/src/parser/parser.d.ts.map +1 -1
  6. package/dist/src/parser/parser.js +28 -24
  7. package/dist/src/parser/parser.js.map +1 -1
  8. package/dist/src/planner/building/select-aggregates.d.ts +6 -1
  9. package/dist/src/planner/building/select-aggregates.d.ts.map +1 -1
  10. package/dist/src/planner/building/select-aggregates.js +23 -4
  11. package/dist/src/planner/building/select-aggregates.js.map +1 -1
  12. package/dist/src/planner/building/select-modifiers.js +7 -2
  13. package/dist/src/planner/building/select-modifiers.js.map +1 -1
  14. package/dist/src/planner/building/select.d.ts.map +1 -1
  15. package/dist/src/planner/building/select.js +2 -2
  16. package/dist/src/planner/building/select.js.map +1 -1
  17. package/dist/src/planner/building/update.d.ts.map +1 -1
  18. package/dist/src/planner/building/update.js +8 -4
  19. package/dist/src/planner/building/update.js.map +1 -1
  20. package/dist/src/planner/nodes/join-node.d.ts.map +1 -1
  21. package/dist/src/planner/nodes/join-node.js +6 -1
  22. package/dist/src/planner/nodes/join-node.js.map +1 -1
  23. package/dist/src/planner/rules/access/rule-select-access-path.js +15 -2
  24. package/dist/src/planner/rules/access/rule-select-access-path.js.map +1 -1
  25. package/dist/src/schema/manager.d.ts +30 -0
  26. package/dist/src/schema/manager.d.ts.map +1 -1
  27. package/dist/src/schema/manager.js +205 -0
  28. package/dist/src/schema/manager.js.map +1 -1
  29. package/dist/src/vtab/best-access-plan.d.ts +2 -0
  30. package/dist/src/vtab/best-access-plan.d.ts.map +1 -1
  31. package/dist/src/vtab/best-access-plan.js.map +1 -1
  32. package/dist/src/vtab/memory/layer/scan-plan.js +2 -2
  33. package/dist/src/vtab/memory/layer/scan-plan.js.map +1 -1
  34. package/dist/src/vtab/memory/module.d.ts +1 -1
  35. package/dist/src/vtab/memory/module.d.ts.map +1 -1
  36. package/dist/src/vtab/memory/module.js +2 -1
  37. package/dist/src/vtab/memory/module.js.map +1 -1
  38. package/dist/src/vtab/module.d.ts +2 -1
  39. package/dist/src/vtab/module.d.ts.map +1 -1
  40. package/package.json +1 -1
  41. package/src/parser/lexer.ts +806 -771
  42. package/src/parser/parser.ts +3352 -3347
  43. package/src/planner/building/select-aggregates.ts +30 -5
  44. package/src/planner/building/select-modifiers.ts +8 -2
  45. package/src/planner/building/select.ts +567 -560
  46. package/src/planner/building/update.ts +9 -5
  47. package/src/planner/nodes/join-node.ts +6 -1
  48. package/src/planner/rules/access/rule-select-access-path.ts +399 -384
  49. package/src/schema/manager.ts +235 -1
  50. package/src/vtab/best-access-plan.ts +2 -0
  51. package/src/vtab/memory/layer/scan-plan.ts +2 -2
  52. package/src/vtab/memory/module.ts +2 -1
  53. package/src/vtab/module.ts +162 -160
@@ -1,771 +1,806 @@
1
- export enum TokenType {
2
- // Literals
3
- INTEGER = 'INTEGER',
4
- FLOAT = 'FLOAT',
5
- STRING = 'STRING',
6
- IDENTIFIER = 'IDENTIFIER',
7
- BLOB = 'BLOB',
8
-
9
- // Keywords
10
- SELECT = 'SELECT',
11
- FROM = 'FROM',
12
- WHERE = 'WHERE',
13
- INSERT = 'INSERT',
14
- UPDATE = 'UPDATE',
15
- DELETE = 'DELETE',
16
- CREATE = 'CREATE',
17
- DROP = 'DROP',
18
- ALTER = 'ALTER',
19
- TABLE = 'TABLE',
20
- INDEX = 'INDEX',
21
- VIEW = 'VIEW',
22
- ASSERTION = 'ASSERTION',
23
- TEMP = 'TEMP',
24
- TEMPORARY = 'TEMPORARY',
25
- VIRTUAL = 'VIRTUAL',
26
- USING = 'USING',
27
- INTO = 'INTO',
28
- NULL = 'NULL',
29
- TRUE = 'TRUE',
30
- FALSE = 'FALSE',
31
- NOT = 'NOT',
32
- AND = 'AND',
33
- OR = 'OR',
34
- IN = 'IN',
35
- LIKE = 'LIKE',
36
- BETWEEN = 'BETWEEN',
37
- IS = 'IS',
38
- AS = 'AS',
39
- DISTINCT = 'DISTINCT',
40
- GROUP = 'GROUP',
41
- BY = 'BY',
42
- HAVING = 'HAVING',
43
- ORDER = 'ORDER',
44
- ASC = 'ASC',
45
- DESC = 'DESC',
46
- LIMIT = 'LIMIT',
47
- OFFSET = 'OFFSET',
48
- UNION = 'UNION',
49
- INTERSECT = 'INTERSECT',
50
- EXCEPT = 'EXCEPT',
51
- DIFF = 'DIFF',
52
- ALL = 'ALL',
53
- PRIMARY = 'PRIMARY',
54
- CONSTRAINT = 'CONSTRAINT',
55
- GENERATED = 'GENERATED',
56
- COLLATE = 'COLLATE',
57
- KEY = 'KEY',
58
- UNIQUE = 'UNIQUE',
59
- DEFAULT = 'DEFAULT',
60
- CHECK = 'CHECK',
61
- FOREIGN = 'FOREIGN',
62
- REFERENCES = 'REFERENCES',
63
- AUTOINCREMENT = 'AUTOINCREMENT',
64
- ON = 'ON',
65
- CONFLICT = 'CONFLICT',
66
- CASCADE = 'CASCADE',
67
- RESTRICT = 'RESTRICT',
68
- SET = 'SET',
69
- NO = 'NO',
70
- ACTION = 'ACTION',
71
- RENAME = 'RENAME',
72
- COLUMN = 'COLUMN',
73
- TO = 'TO',
74
- ADD = 'ADD',
75
- ALWAYS = 'ALWAYS',
76
- ABORT = 'ABORT',
77
- FAIL = 'FAIL',
78
- IGNORE = 'IGNORE',
79
- BEGIN = 'BEGIN',
80
- COMMIT = 'COMMIT',
81
- ROLLBACK = 'ROLLBACK',
82
- TRANSACTION = 'TRANSACTION',
83
- DEFERRED = 'DEFERRED',
84
- IMMEDIATE = 'IMMEDIATE',
85
- JOIN = 'JOIN',
86
- INNER = 'INNER',
87
- LEFT = 'LEFT',
88
- RIGHT = 'RIGHT',
89
- FULL = 'FULL',
90
- CROSS = 'CROSS',
91
- OUTER = 'OUTER',
92
- NATURAL = 'NATURAL',
93
- REPLACE = 'REPLACE',
94
- VALUES = 'VALUES',
95
- EXISTS = 'EXISTS',
96
- IF = 'IF',
97
- DEFERRABLE = 'DEFERRABLE',
98
- INITIALLY = 'INITIALLY',
99
- STORED = 'STORED',
100
- RETURNING = 'RETURNING',
101
- SAVEPOINT = 'SAVEPOINT',
102
- RELEASE = 'RELEASE',
103
- PRAGMA = 'PRAGMA',
104
- WITH = 'WITH',
105
- RECURSIVE = 'RECURSIVE',
106
- XOR = 'XOR',
107
- CASE = 'CASE',
108
- WHEN = 'WHEN',
109
- THEN = 'THEN',
110
- ELSE = 'ELSE',
111
- END = 'END',
112
- CAST = 'CAST',
113
- OVER = 'OVER',
114
- PARTITION = 'PARTITION',
115
- LATERAL = 'LATERAL',
116
- ROW = 'ROW',
117
- ROWS = 'ROWS',
118
- RANGE = 'RANGE',
119
- UNBOUNDED = 'UNBOUNDED',
120
- PRECEDING = 'PRECEDING',
121
- FOLLOWING = 'FOLLOWING',
122
- CURRENT = 'CURRENT',
123
-
124
- // Declarative schema
125
- DECLARE = 'DECLARE',
126
- SCHEMA = 'SCHEMA',
127
- APPLY = 'APPLY',
128
- EXPLAIN = 'EXPLAIN',
129
- VERSION = 'VERSION',
130
- SEED = 'SEED',
131
-
132
- // Operators and punctuation
133
- PLUS = 'PLUS', // +
134
- MINUS = 'MINUS', // -
135
- ASTERISK = 'ASTERISK', // *
136
- SLASH = 'SLASH', // /
137
- PERCENT = 'PERCENT', // %
138
- EQUAL = 'EQUAL', // =
139
- EQUAL_EQUAL = 'EQUAL_EQUAL', // == (SQLite allows both = and ==)
140
- NOT_EQUAL = 'NOT_EQUAL', // != or <>
141
- LESS = 'LESS', // <
142
- LESS_EQUAL = 'LESS_EQUAL', // <=
143
- GREATER = 'GREATER', // >
144
- GREATER_EQUAL = 'GREATER_EQUAL', // >=
145
- LPAREN = 'LPAREN', // (
146
- RPAREN = 'RPAREN', // )
147
- COMMA = 'COMMA', // ,
148
- DOT = 'DOT', // .
149
- SEMICOLON = 'SEMICOLON', // ;
150
- TILDE = 'TILDE', // ~ (for REGEXP)
151
- PIPE = 'PIPE', // | (for concatenation or UNION)
152
- PIPE_PIPE = 'PIPE_PIPE', // || (for concatenation)
153
- AMPERSAND = 'AMPERSAND', // &
154
- AMPERSAND_AMPERSAND = 'AMPERSAND_AMPERSAND', // &&
155
- QUESTION = 'QUESTION', // ? (for parameters)
156
- COLON = 'COLON', // : (for named parameters)
157
- DOLLAR = 'DOLLAR', // $ (for named parameters)
158
- ARROW = 'ARROW', // -> (JSON operator)
159
- LBRACE = 'LBRACE', // {
160
- RBRACE = 'RBRACE', // }
161
-
162
- // Special
163
- EOF = 'EOF',
164
- ERROR = 'ERROR'
165
- }
166
-
167
- // Token represents a lexical token from the SQL input
168
- export interface Token {
169
- type: TokenType;
170
- lexeme: string;
171
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
172
- literal?: any;
173
- startLine: number;
174
- startColumn: number;
175
- startOffset: number;
176
- endLine: number;
177
- endColumn: number;
178
- endOffset: number;
179
- }
180
-
181
- // Reserved keywords mapping
182
- export const KEYWORDS: Record<string, TokenType> = {
183
- 'select': TokenType.SELECT,
184
- 'from': TokenType.FROM,
185
- 'where': TokenType.WHERE,
186
- 'insert': TokenType.INSERT,
187
- 'update': TokenType.UPDATE,
188
- 'delete': TokenType.DELETE,
189
- 'create': TokenType.CREATE,
190
- 'drop': TokenType.DROP,
191
- 'alter': TokenType.ALTER,
192
- 'table': TokenType.TABLE,
193
- 'index': TokenType.INDEX,
194
- 'view': TokenType.VIEW,
195
- 'assertion': TokenType.ASSERTION,
196
- 'virtual': TokenType.VIRTUAL,
197
- 'using': TokenType.USING,
198
- 'null': TokenType.NULL,
199
- 'true': TokenType.TRUE,
200
- 'false': TokenType.FALSE,
201
- 'not': TokenType.NOT,
202
- 'and': TokenType.AND,
203
- 'or': TokenType.OR,
204
- 'in': TokenType.IN,
205
- 'like': TokenType.LIKE,
206
- 'between': TokenType.BETWEEN,
207
- 'is': TokenType.IS,
208
- 'as': TokenType.AS,
209
- 'distinct': TokenType.DISTINCT,
210
- 'group': TokenType.GROUP,
211
- 'by': TokenType.BY,
212
- 'having': TokenType.HAVING,
213
- 'order': TokenType.ORDER,
214
- 'asc': TokenType.ASC,
215
- 'desc': TokenType.DESC,
216
- 'limit': TokenType.LIMIT,
217
- 'offset': TokenType.OFFSET,
218
- 'union': TokenType.UNION,
219
- 'diff': TokenType.DIFF,
220
- 'all': TokenType.ALL,
221
- 'primary': TokenType.PRIMARY,
222
- 'constraint': TokenType.CONSTRAINT,
223
- 'key': TokenType.KEY,
224
- 'unique': TokenType.UNIQUE,
225
- 'default': TokenType.DEFAULT,
226
- 'check': TokenType.CHECK,
227
- 'collate': TokenType.COLLATE,
228
- 'foreign': TokenType.FOREIGN,
229
- 'references': TokenType.REFERENCES,
230
- 'on': TokenType.ON,
231
- 'conflict': TokenType.CONFLICT,
232
- 'cascade': TokenType.CASCADE,
233
- 'restrict': TokenType.RESTRICT,
234
- 'set': TokenType.SET,
235
- 'autoincrement': TokenType.AUTOINCREMENT,
236
- 'no': TokenType.NO,
237
- 'action': TokenType.ACTION,
238
- 'begin': TokenType.BEGIN,
239
- 'commit': TokenType.COMMIT,
240
- 'rollback': TokenType.ROLLBACK,
241
- 'transaction': TokenType.TRANSACTION,
242
- 'deferred': TokenType.DEFERRED,
243
- 'immediate': TokenType.IMMEDIATE,
244
- 'deferrable': TokenType.DEFERRABLE,
245
- 'initially': TokenType.INITIALLY,
246
- 'stored': TokenType.STORED,
247
- 'returning': TokenType.RETURNING,
248
- 'join': TokenType.JOIN,
249
- 'inner': TokenType.INNER,
250
- 'left': TokenType.LEFT,
251
- 'right': TokenType.RIGHT,
252
- 'full': TokenType.FULL,
253
- 'cross': TokenType.CROSS,
254
- 'outer': TokenType.OUTER,
255
- 'natural': TokenType.NATURAL,
256
- 'replace': TokenType.REPLACE,
257
- 'values': TokenType.VALUES,
258
- 'exists': TokenType.EXISTS,
259
- 'if': TokenType.IF,
260
- 'into': TokenType.INTO,
261
- 'temp': TokenType.TEMP,
262
- 'temporary': TokenType.TEMPORARY,
263
- 'rename': TokenType.RENAME,
264
- 'to': TokenType.TO,
265
- 'add': TokenType.ADD,
266
- 'always': TokenType.ALWAYS,
267
- 'abort': TokenType.ABORT,
268
- 'fail': TokenType.FAIL,
269
- 'ignore': TokenType.IGNORE,
270
- 'savepoint': TokenType.SAVEPOINT,
271
- 'release': TokenType.RELEASE,
272
- 'pragma': TokenType.PRAGMA,
273
- 'with': TokenType.WITH,
274
- 'recursive': TokenType.RECURSIVE,
275
- 'xor': TokenType.XOR,
276
- 'case': TokenType.CASE,
277
- 'when': TokenType.WHEN,
278
- 'then': TokenType.THEN,
279
- 'else': TokenType.ELSE,
280
- 'end': TokenType.END,
281
- 'cast': TokenType.CAST,
282
- 'over': TokenType.OVER,
283
- 'partition': TokenType.PARTITION,
284
- 'lateral': TokenType.LATERAL,
285
- 'row': TokenType.ROW,
286
- 'rows': TokenType.ROWS,
287
- 'range': TokenType.RANGE,
288
- 'unbounded': TokenType.UNBOUNDED,
289
- 'preceding': TokenType.PRECEDING,
290
- 'following': TokenType.FOLLOWING,
291
- 'current': TokenType.CURRENT,
292
- 'intersect': TokenType.INTERSECT,
293
- 'except': TokenType.EXCEPT,
294
- 'declare': TokenType.DECLARE,
295
- // Note: schema, version, seed deliberately NOT reserved here - treated as contextual keywords
296
- // to avoid breaking schema() function calls and column names like 'version', 'seed'
297
- 'apply': TokenType.APPLY,
298
- 'explain': TokenType.EXPLAIN,
299
- };
300
-
301
- /**
302
- * Lexer class for tokenizing SQL statements
303
- */
304
- export class Lexer {
305
- private source: string;
306
- private tokens: Token[] = [];
307
- private start = 0;
308
- private current = 0;
309
- private line = 1;
310
- private column = 1;
311
- private startLine = 1;
312
- private startColumn = 1;
313
-
314
- constructor(source: string) {
315
- this.source = source;
316
- }
317
-
318
- /**
319
- * Scans the input and returns all tokens.
320
- */
321
- scanTokens(): Token[] {
322
- while (!this.isAtEnd()) {
323
- this.start = this.current;
324
- this.startLine = this.line;
325
- this.startColumn = this.column;
326
- this.scanToken();
327
- }
328
-
329
- this.tokens.push({
330
- type: TokenType.EOF,
331
- lexeme: '',
332
- startLine: this.line,
333
- startColumn: this.column,
334
- startOffset: this.source.length,
335
- endLine: this.line,
336
- endColumn: this.column,
337
- endOffset: this.source.length,
338
- });
339
-
340
- return this.tokens;
341
- }
342
-
343
- private isAtEnd(): boolean {
344
- return this.current >= this.source.length;
345
- }
346
-
347
- private scanToken(): void {
348
- const c = this.advance();
349
-
350
- switch (c) {
351
- // Single-character tokens
352
- case '(': this.addToken(TokenType.LPAREN); break;
353
- case ')': this.addToken(TokenType.RPAREN); break;
354
- case '{': this.addToken(TokenType.LBRACE); break;
355
- case '}': this.addToken(TokenType.RBRACE); break;
356
- case ',': this.addToken(TokenType.COMMA); break;
357
- case '.': this.addToken(TokenType.DOT); break;
358
- case ';': this.addToken(TokenType.SEMICOLON); break;
359
- case '+': this.addToken(TokenType.PLUS); break;
360
- case '-':
361
- if (this.match('-')) {
362
- // SQL-style line comment
363
- while (this.peek() !== '\n' && !this.isAtEnd()) {
364
- this.advance();
365
- }
366
- } else if (this.match('>')) {
367
- this.addToken(TokenType.ARROW);
368
- } else {
369
- this.addToken(TokenType.MINUS);
370
- }
371
- break;
372
- case '*': this.addToken(TokenType.ASTERISK); break;
373
- case '/':
374
- if (this.match('/')) {
375
- // Single line comment
376
- while (this.peek() !== '\n' && !this.isAtEnd()) {
377
- this.advance();
378
- }
379
- } else if (this.match('*')) {
380
- // Multiline comment
381
- this.multilineComment();
382
- } else {
383
- this.addToken(TokenType.SLASH);
384
- }
385
- break;
386
- case '%': this.addToken(TokenType.PERCENT); break;
387
- case '~': this.addToken(TokenType.TILDE); break;
388
- case '?': this.addToken(TokenType.QUESTION); break;
389
- case ':': this.addToken(TokenType.COLON); break;
390
- case '$': this.addToken(TokenType.DOLLAR); break;
391
-
392
- // One or two character tokens
393
- case '=':
394
- this.addToken(this.match('=') ? TokenType.EQUAL_EQUAL : TokenType.EQUAL);
395
- break;
396
- case '!':
397
- this.addToken(this.match('=') ? TokenType.NOT_EQUAL : TokenType.ERROR);
398
- break;
399
- case '<':
400
- if (this.match('=')) {
401
- this.addToken(TokenType.LESS_EQUAL);
402
- } else if (this.match('>')) {
403
- this.addToken(TokenType.NOT_EQUAL);
404
- } else {
405
- this.addToken(TokenType.LESS);
406
- }
407
- break;
408
- case '>':
409
- this.addToken(this.match('=') ? TokenType.GREATER_EQUAL : TokenType.GREATER);
410
- break;
411
- case '|':
412
- this.addToken(this.match('|') ? TokenType.PIPE_PIPE : TokenType.PIPE);
413
- break;
414
- case '&':
415
- this.addToken(this.match('&') ? TokenType.AMPERSAND_AMPERSAND : TokenType.AMPERSAND);
416
- break;
417
-
418
- // String literals
419
- case '\'': this.string('\''); break;
420
- case '"': this.string('"'); break;
421
- case '`': this.backtickIdentifier(); break;
422
- case '[': this.bracketIdentifier(); break;
423
-
424
- // Blob literals
425
- case 'x':
426
- case 'X':
427
- if (this.match('\'')) {
428
- this.blobLiteral();
429
- } else {
430
- this.identifier();
431
- }
432
- break;
433
-
434
- // Whitespace
435
- case ' ':
436
- case '\r':
437
- case '\t':
438
- // Ignore whitespace
439
- break;
440
- case '\n':
441
- // Newline handling already done in advance()
442
- break;
443
-
444
- // Default - handle identifiers and numbers
445
- default:
446
- if (this.isDigit(c)) {
447
- this.number();
448
- } else if (this.isAlpha(c)) {
449
- this.identifier();
450
- } else {
451
- this.addErrorToken(`Unexpected character: ${c}`);
452
- }
453
- break;
454
- }
455
- }
456
-
457
- private advance(): string {
458
- const char = this.source.charAt(this.current);
459
- this.current++;
460
- if (char === '\n') {
461
- this.line++;
462
- this.column = 1;
463
- } else {
464
- this.column++;
465
- }
466
- return char;
467
- }
468
-
469
- private match(expected: string): boolean {
470
- if (this.isAtEnd()) return false;
471
- if (this.source.charAt(this.current) !== expected) return false;
472
-
473
- this.current++;
474
- this.column++;
475
- return true;
476
- }
477
-
478
- private peek(): string {
479
- if (this.isAtEnd()) return '\0';
480
- return this.source.charAt(this.current);
481
- }
482
-
483
- private peekNext(): string {
484
- if (this.current + 1 >= this.source.length) return '\0';
485
- return this.source.charAt(this.current + 1);
486
- }
487
-
488
- private string(quote: string): void {
489
- let value = '';
490
- let escaping = false;
491
-
492
- while ((!this.isAtEnd() && this.peek() !== quote) || escaping) {
493
- if (escaping) {
494
- // Handle escape sequences
495
- const c = this.peek();
496
- switch (c) {
497
- case 'n': value += '\n'; break;
498
- case 'r': value += '\r'; break;
499
- case 't': value += '\t'; break;
500
- case '\\': value += '\\'; break;
501
- case '\'': value += '\''; break;
502
- case '"': value += '"'; break;
503
- case '0': value += '\0'; break;
504
- default: value += c; break;
505
- }
506
- escaping = false;
507
- } else if (this.peek() === '\\') {
508
- escaping = true;
509
- } else {
510
- value += this.peek();
511
- }
512
-
513
- this.advance();
514
- }
515
-
516
- if (this.isAtEnd()) {
517
- this.addErrorToken("Unterminated string.");
518
- return;
519
- }
520
-
521
- // Consume the closing quote
522
- this.advance();
523
-
524
- // SQLite allows adjacent string literals to be concatenated
525
- if (this.peek() === quote) {
526
- this.advance(); // Consume the opening quote of the next string
527
- this.string(quote); // Process the next string
528
- // Merge the two string tokens
529
- if (this.tokens.length > 0 && this.tokens[this.tokens.length - 1].type === TokenType.STRING) {
530
- const prevToken = this.tokens.pop()!;
531
- value += prevToken.literal;
532
- }
533
- }
534
-
535
- this.addToken(TokenType.STRING, value);
536
- }
537
-
538
- private backtickIdentifier(): void {
539
- let value = '';
540
-
541
- while (!this.isAtEnd() && this.peek() !== '`') {
542
- value += this.advance();
543
- }
544
-
545
- if (this.isAtEnd()) {
546
- this.addErrorToken("Unterminated identifier.");
547
- return;
548
- }
549
-
550
- // Consume the closing backtick
551
- this.advance();
552
-
553
- this.addToken(TokenType.IDENTIFIER, value);
554
- }
555
-
556
- private bracketIdentifier(): void {
557
- let value = '';
558
-
559
- while (!this.isAtEnd() && this.peek() !== ']') {
560
- value += this.advance();
561
- }
562
-
563
- if (this.isAtEnd()) {
564
- this.addErrorToken("Unterminated identifier.");
565
- return;
566
- }
567
-
568
- // Consume the closing bracket
569
- this.advance();
570
-
571
- this.addToken(TokenType.IDENTIFIER, value);
572
- }
573
-
574
- private blobLiteral(): void {
575
- let value = '';
576
-
577
- while (!this.isAtEnd() && this.peek() !== '\'') {
578
- if (this.isHexDigit(this.peek())) {
579
- value += this.advance();
580
- } else if (this.isWhitespace(this.peek())) {
581
- this.advance(); // Skip whitespace in blob literals
582
- } else {
583
- this.addErrorToken("Invalid character in blob literal.");
584
- return;
585
- }
586
- }
587
-
588
- if (this.isAtEnd()) {
589
- this.addErrorToken("Unterminated blob literal.");
590
- return;
591
- }
592
-
593
- // Consume the closing quote
594
- this.advance();
595
-
596
- // Validate hex string length
597
- if (value.length % 2 !== 0) {
598
- this.addErrorToken("Blob literal must have an even number of hex digits.");
599
- return;
600
- }
601
-
602
- // Convert hex string to Uint8Array
603
- try {
604
- const bytes = new Uint8Array(value.length / 2);
605
- for (let i = 0; i < value.length; i += 2) {
606
- bytes[i / 2] = parseInt(value.substring(i, i + 2), 16);
607
- }
608
- this.addToken(TokenType.BLOB, bytes);
609
- } catch {
610
- this.addErrorToken("Invalid blob literal.");
611
- }
612
- }
613
-
614
- private number(): void {
615
- let isFloat = false;
616
- // Capture original lexeme starting from the first digit
617
- const start = this.start; // Use the start offset saved before scanToken called number()
618
-
619
- // Consume digits before decimal point
620
- while (this.isDigit(this.peek())) {
621
- this.advance();
622
- }
623
-
624
- // Check for decimal point
625
- if (this.peek() === '.' && this.isDigit(this.peekNext())) {
626
- isFloat = true;
627
- this.advance(); // Consume the '.'
628
-
629
- // Consume digits after decimal point
630
- while (this.isDigit(this.peek())) {
631
- this.advance();
632
- }
633
- }
634
-
635
- // Check for exponent part
636
- if (this.peek().toLowerCase() === 'e') {
637
- isFloat = true;
638
- this.advance(); // Consume the 'e' or 'E'
639
-
640
- // Optional sign
641
- if (this.peek() === '+' || this.peek() === '-') {
642
- this.advance();
643
- }
644
-
645
- // Exponent digits
646
- if (!this.isDigit(this.peek())) {
647
- this.addErrorToken("Invalid number literal: expected digits after exponent.");
648
- return;
649
- }
650
-
651
- while (this.isDigit(this.peek())) {
652
- this.advance();
653
- }
654
- }
655
-
656
- const lexeme = this.source.substring(start, this.current);
657
-
658
- if (isFloat) {
659
- // Store original string as literal for FLOAT
660
- this.addToken(TokenType.FLOAT, lexeme);
661
- } else {
662
- // For integers, parse now to handle potential BigInt
663
- try {
664
- const num = parseInt(lexeme, 10);
665
- if (!Number.isSafeInteger(num)) {
666
- try {
667
- this.addToken(TokenType.INTEGER, BigInt(lexeme));
668
- } catch {
669
- this.addErrorToken("Integer literal too large.");
670
- }
671
- } else {
672
- this.addToken(TokenType.INTEGER, num);
673
- }
674
- } catch {
675
- try {
676
- this.addToken(TokenType.INTEGER, BigInt(lexeme));
677
- } catch {
678
- this.addErrorToken("Invalid integer literal.");
679
- }
680
- }
681
- }
682
- }
683
-
684
- private identifier(): void {
685
- while (this.isAlphaNumeric(this.peek())) {
686
- this.advance();
687
- }
688
-
689
- // Check if the identifier is a keyword
690
- const text = this.source.substring(this.start, this.current).toLowerCase();
691
- const type = KEYWORDS[text] || TokenType.IDENTIFIER;
692
-
693
- this.addToken(type);
694
- }
695
-
696
- private multilineComment(): void {
697
- let nesting = 1; // Support nested comments
698
-
699
- while (nesting > 0 && !this.isAtEnd()) {
700
- if (this.peek() === '/' && this.peekNext() === '*') {
701
- this.advance(); // Consume '/'
702
- this.advance(); // Consume '*'
703
- nesting++;
704
- } else if (this.peek() === '*' && this.peekNext() === '/') {
705
- this.advance(); // Consume '*'
706
- this.advance(); // Consume '/'
707
- nesting--;
708
- } else {
709
- // Advance one character and let advance() maintain line/column
710
- this.advance();
711
- }
712
- }
713
-
714
- if (nesting > 0) {
715
- this.addErrorToken("Unterminated comment.");
716
- }
717
- }
718
-
719
- private isDigit(c: string): boolean {
720
- return c >= '0' && c <= '9';
721
- }
722
-
723
- private isHexDigit(c: string): boolean {
724
- return (c >= '0' && c <= '9') ||
725
- (c >= 'a' && c <= 'f') ||
726
- (c >= 'A' && c <= 'F');
727
- }
728
-
729
- private isAlpha(c: string): boolean {
730
- return (c >= 'a' && c <= 'z') ||
731
- (c >= 'A' && c <= 'Z') ||
732
- c === '_';
733
- }
734
-
735
- private isAlphaNumeric(c: string): boolean {
736
- return this.isAlpha(c) || this.isDigit(c);
737
- }
738
-
739
- private isWhitespace(c: string): boolean {
740
- return c === ' ' || c === '\r' || c === '\n' || c === '\t';
741
- }
742
-
743
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
744
- private addToken(type: TokenType, literal?: any): void {
745
- const lexeme = this.source.substring(this.start, this.current);
746
- this.tokens.push({
747
- type,
748
- lexeme, // Ensure lexeme is always the original string
749
- literal,
750
- startLine: this.startLine,
751
- startColumn: this.startColumn,
752
- startOffset: this.start,
753
- endLine: this.line,
754
- endColumn: this.column -1,
755
- endOffset: this.current,
756
- });
757
- }
758
-
759
- private addErrorToken(message: string): void {
760
- this.tokens.push({
761
- type: TokenType.ERROR,
762
- lexeme: message,
763
- startLine: this.line,
764
- startColumn: this.column -1,
765
- startOffset: this.current -1,
766
- endLine: this.line,
767
- endColumn: this.column -1,
768
- endOffset: this.current,
769
- });
770
- }
771
- }
1
+ export enum TokenType {
2
+ // Literals
3
+ INTEGER = 'INTEGER',
4
+ FLOAT = 'FLOAT',
5
+ STRING = 'STRING',
6
+ IDENTIFIER = 'IDENTIFIER',
7
+ BLOB = 'BLOB',
8
+
9
+ // Keywords
10
+ SELECT = 'SELECT',
11
+ FROM = 'FROM',
12
+ WHERE = 'WHERE',
13
+ INSERT = 'INSERT',
14
+ UPDATE = 'UPDATE',
15
+ DELETE = 'DELETE',
16
+ CREATE = 'CREATE',
17
+ DROP = 'DROP',
18
+ ALTER = 'ALTER',
19
+ TABLE = 'TABLE',
20
+ INDEX = 'INDEX',
21
+ VIEW = 'VIEW',
22
+ ASSERTION = 'ASSERTION',
23
+ TEMP = 'TEMP',
24
+ TEMPORARY = 'TEMPORARY',
25
+ VIRTUAL = 'VIRTUAL',
26
+ USING = 'USING',
27
+ INTO = 'INTO',
28
+ NULL = 'NULL',
29
+ TRUE = 'TRUE',
30
+ FALSE = 'FALSE',
31
+ NOT = 'NOT',
32
+ AND = 'AND',
33
+ OR = 'OR',
34
+ IN = 'IN',
35
+ LIKE = 'LIKE',
36
+ BETWEEN = 'BETWEEN',
37
+ IS = 'IS',
38
+ AS = 'AS',
39
+ DISTINCT = 'DISTINCT',
40
+ GROUP = 'GROUP',
41
+ BY = 'BY',
42
+ HAVING = 'HAVING',
43
+ ORDER = 'ORDER',
44
+ ASC = 'ASC',
45
+ DESC = 'DESC',
46
+ LIMIT = 'LIMIT',
47
+ OFFSET = 'OFFSET',
48
+ UNION = 'UNION',
49
+ INTERSECT = 'INTERSECT',
50
+ EXCEPT = 'EXCEPT',
51
+ DIFF = 'DIFF',
52
+ ALL = 'ALL',
53
+ PRIMARY = 'PRIMARY',
54
+ CONSTRAINT = 'CONSTRAINT',
55
+ GENERATED = 'GENERATED',
56
+ COLLATE = 'COLLATE',
57
+ KEY = 'KEY',
58
+ UNIQUE = 'UNIQUE',
59
+ DEFAULT = 'DEFAULT',
60
+ CHECK = 'CHECK',
61
+ FOREIGN = 'FOREIGN',
62
+ REFERENCES = 'REFERENCES',
63
+ AUTOINCREMENT = 'AUTOINCREMENT',
64
+ ON = 'ON',
65
+ CONFLICT = 'CONFLICT',
66
+ CASCADE = 'CASCADE',
67
+ RESTRICT = 'RESTRICT',
68
+ SET = 'SET',
69
+ NO = 'NO',
70
+ ACTION = 'ACTION',
71
+ RENAME = 'RENAME',
72
+ COLUMN = 'COLUMN',
73
+ TO = 'TO',
74
+ ADD = 'ADD',
75
+ ALWAYS = 'ALWAYS',
76
+ ABORT = 'ABORT',
77
+ FAIL = 'FAIL',
78
+ IGNORE = 'IGNORE',
79
+ BEGIN = 'BEGIN',
80
+ COMMIT = 'COMMIT',
81
+ ROLLBACK = 'ROLLBACK',
82
+ TRANSACTION = 'TRANSACTION',
83
+ DEFERRED = 'DEFERRED',
84
+ IMMEDIATE = 'IMMEDIATE',
85
+ JOIN = 'JOIN',
86
+ INNER = 'INNER',
87
+ LEFT = 'LEFT',
88
+ RIGHT = 'RIGHT',
89
+ FULL = 'FULL',
90
+ CROSS = 'CROSS',
91
+ OUTER = 'OUTER',
92
+ NATURAL = 'NATURAL',
93
+ REPLACE = 'REPLACE',
94
+ VALUES = 'VALUES',
95
+ EXISTS = 'EXISTS',
96
+ IF = 'IF',
97
+ DEFERRABLE = 'DEFERRABLE',
98
+ INITIALLY = 'INITIALLY',
99
+ STORED = 'STORED',
100
+ RETURNING = 'RETURNING',
101
+ SAVEPOINT = 'SAVEPOINT',
102
+ RELEASE = 'RELEASE',
103
+ PRAGMA = 'PRAGMA',
104
+ WITH = 'WITH',
105
+ RECURSIVE = 'RECURSIVE',
106
+ XOR = 'XOR',
107
+ CASE = 'CASE',
108
+ WHEN = 'WHEN',
109
+ THEN = 'THEN',
110
+ ELSE = 'ELSE',
111
+ END = 'END',
112
+ CAST = 'CAST',
113
+ OVER = 'OVER',
114
+ PARTITION = 'PARTITION',
115
+ LATERAL = 'LATERAL',
116
+ ROW = 'ROW',
117
+ ROWS = 'ROWS',
118
+ RANGE = 'RANGE',
119
+ UNBOUNDED = 'UNBOUNDED',
120
+ PRECEDING = 'PRECEDING',
121
+ FOLLOWING = 'FOLLOWING',
122
+ CURRENT = 'CURRENT',
123
+
124
+ // Declarative schema
125
+ DECLARE = 'DECLARE',
126
+ SCHEMA = 'SCHEMA',
127
+ APPLY = 'APPLY',
128
+ EXPLAIN = 'EXPLAIN',
129
+ VERSION = 'VERSION',
130
+ SEED = 'SEED',
131
+
132
+ // Operators and punctuation
133
+ PLUS = 'PLUS', // +
134
+ MINUS = 'MINUS', // -
135
+ ASTERISK = 'ASTERISK', // *
136
+ SLASH = 'SLASH', // /
137
+ PERCENT = 'PERCENT', // %
138
+ EQUAL = 'EQUAL', // =
139
+ EQUAL_EQUAL = 'EQUAL_EQUAL', // == (SQLite allows both = and ==)
140
+ NOT_EQUAL = 'NOT_EQUAL', // != or <>
141
+ LESS = 'LESS', // <
142
+ LESS_EQUAL = 'LESS_EQUAL', // <=
143
+ GREATER = 'GREATER', // >
144
+ GREATER_EQUAL = 'GREATER_EQUAL', // >=
145
+ LPAREN = 'LPAREN', // (
146
+ RPAREN = 'RPAREN', // )
147
+ COMMA = 'COMMA', // ,
148
+ DOT = 'DOT', // .
149
+ SEMICOLON = 'SEMICOLON', // ;
150
+ TILDE = 'TILDE', // ~ (for REGEXP)
151
+ PIPE = 'PIPE', // | (for concatenation or UNION)
152
+ PIPE_PIPE = 'PIPE_PIPE', // || (for concatenation)
153
+ AMPERSAND = 'AMPERSAND', // &
154
+ AMPERSAND_AMPERSAND = 'AMPERSAND_AMPERSAND', // &&
155
+ QUESTION = 'QUESTION', // ? (for parameters)
156
+ COLON = 'COLON', // : (for named parameters)
157
+ DOLLAR = 'DOLLAR', // $ (for named parameters)
158
+ ARROW = 'ARROW', // -> (JSON operator)
159
+ LBRACE = 'LBRACE', // {
160
+ RBRACE = 'RBRACE', // }
161
+
162
+ // Special
163
+ EOF = 'EOF',
164
+ ERROR = 'ERROR'
165
+ }
166
+
167
+ // Token represents a lexical token from the SQL input
168
+ export interface Token {
169
+ type: TokenType;
170
+ lexeme: string;
171
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
172
+ literal?: any;
173
+ startLine: number;
174
+ startColumn: number;
175
+ startOffset: number;
176
+ endLine: number;
177
+ endColumn: number;
178
+ endOffset: number;
179
+ }
180
+
181
+ // Reserved keywords mapping
182
+ export const KEYWORDS: Record<string, TokenType> = {
183
+ 'select': TokenType.SELECT,
184
+ 'from': TokenType.FROM,
185
+ 'where': TokenType.WHERE,
186
+ 'insert': TokenType.INSERT,
187
+ 'update': TokenType.UPDATE,
188
+ 'delete': TokenType.DELETE,
189
+ 'create': TokenType.CREATE,
190
+ 'drop': TokenType.DROP,
191
+ 'alter': TokenType.ALTER,
192
+ 'table': TokenType.TABLE,
193
+ 'index': TokenType.INDEX,
194
+ 'view': TokenType.VIEW,
195
+ 'assertion': TokenType.ASSERTION,
196
+ 'virtual': TokenType.VIRTUAL,
197
+ 'using': TokenType.USING,
198
+ 'null': TokenType.NULL,
199
+ 'true': TokenType.TRUE,
200
+ 'false': TokenType.FALSE,
201
+ 'not': TokenType.NOT,
202
+ 'and': TokenType.AND,
203
+ 'or': TokenType.OR,
204
+ 'in': TokenType.IN,
205
+ 'like': TokenType.LIKE,
206
+ 'between': TokenType.BETWEEN,
207
+ 'is': TokenType.IS,
208
+ 'as': TokenType.AS,
209
+ 'distinct': TokenType.DISTINCT,
210
+ 'group': TokenType.GROUP,
211
+ 'by': TokenType.BY,
212
+ 'having': TokenType.HAVING,
213
+ 'order': TokenType.ORDER,
214
+ 'asc': TokenType.ASC,
215
+ 'desc': TokenType.DESC,
216
+ 'limit': TokenType.LIMIT,
217
+ 'offset': TokenType.OFFSET,
218
+ 'union': TokenType.UNION,
219
+ 'diff': TokenType.DIFF,
220
+ 'all': TokenType.ALL,
221
+ 'primary': TokenType.PRIMARY,
222
+ 'constraint': TokenType.CONSTRAINT,
223
+ 'key': TokenType.KEY,
224
+ 'unique': TokenType.UNIQUE,
225
+ 'default': TokenType.DEFAULT,
226
+ 'check': TokenType.CHECK,
227
+ 'collate': TokenType.COLLATE,
228
+ 'foreign': TokenType.FOREIGN,
229
+ 'references': TokenType.REFERENCES,
230
+ 'on': TokenType.ON,
231
+ 'conflict': TokenType.CONFLICT,
232
+ 'cascade': TokenType.CASCADE,
233
+ 'restrict': TokenType.RESTRICT,
234
+ 'set': TokenType.SET,
235
+ 'autoincrement': TokenType.AUTOINCREMENT,
236
+ 'no': TokenType.NO,
237
+ 'action': TokenType.ACTION,
238
+ 'begin': TokenType.BEGIN,
239
+ 'commit': TokenType.COMMIT,
240
+ 'rollback': TokenType.ROLLBACK,
241
+ 'transaction': TokenType.TRANSACTION,
242
+ 'deferred': TokenType.DEFERRED,
243
+ 'immediate': TokenType.IMMEDIATE,
244
+ 'deferrable': TokenType.DEFERRABLE,
245
+ 'initially': TokenType.INITIALLY,
246
+ 'stored': TokenType.STORED,
247
+ 'returning': TokenType.RETURNING,
248
+ 'join': TokenType.JOIN,
249
+ 'inner': TokenType.INNER,
250
+ 'left': TokenType.LEFT,
251
+ 'right': TokenType.RIGHT,
252
+ 'full': TokenType.FULL,
253
+ 'cross': TokenType.CROSS,
254
+ 'outer': TokenType.OUTER,
255
+ 'natural': TokenType.NATURAL,
256
+ 'replace': TokenType.REPLACE,
257
+ 'values': TokenType.VALUES,
258
+ 'exists': TokenType.EXISTS,
259
+ 'if': TokenType.IF,
260
+ 'into': TokenType.INTO,
261
+ 'temp': TokenType.TEMP,
262
+ 'temporary': TokenType.TEMPORARY,
263
+ 'rename': TokenType.RENAME,
264
+ 'to': TokenType.TO,
265
+ 'add': TokenType.ADD,
266
+ 'always': TokenType.ALWAYS,
267
+ 'abort': TokenType.ABORT,
268
+ 'fail': TokenType.FAIL,
269
+ 'ignore': TokenType.IGNORE,
270
+ 'savepoint': TokenType.SAVEPOINT,
271
+ 'release': TokenType.RELEASE,
272
+ 'pragma': TokenType.PRAGMA,
273
+ 'with': TokenType.WITH,
274
+ 'recursive': TokenType.RECURSIVE,
275
+ 'xor': TokenType.XOR,
276
+ 'case': TokenType.CASE,
277
+ 'when': TokenType.WHEN,
278
+ 'then': TokenType.THEN,
279
+ 'else': TokenType.ELSE,
280
+ 'end': TokenType.END,
281
+ 'cast': TokenType.CAST,
282
+ 'over': TokenType.OVER,
283
+ 'partition': TokenType.PARTITION,
284
+ 'lateral': TokenType.LATERAL,
285
+ 'row': TokenType.ROW,
286
+ 'rows': TokenType.ROWS,
287
+ 'range': TokenType.RANGE,
288
+ 'unbounded': TokenType.UNBOUNDED,
289
+ 'preceding': TokenType.PRECEDING,
290
+ 'following': TokenType.FOLLOWING,
291
+ 'current': TokenType.CURRENT,
292
+ 'intersect': TokenType.INTERSECT,
293
+ 'except': TokenType.EXCEPT,
294
+ 'declare': TokenType.DECLARE,
295
+ // Note: schema, version, seed deliberately NOT reserved here - treated as contextual keywords
296
+ // to avoid breaking schema() function calls and column names like 'version', 'seed'
297
+ 'apply': TokenType.APPLY,
298
+ 'explain': TokenType.EXPLAIN,
299
+ };
300
+
301
+ /**
302
+ * Lexer class for tokenizing SQL statements
303
+ */
304
+ export class Lexer {
305
+ private source: string;
306
+ private tokens: Token[] = [];
307
+ private start = 0;
308
+ private current = 0;
309
+ private line = 1;
310
+ private column = 1;
311
+ private startLine = 1;
312
+ private startColumn = 1;
313
+
314
+ constructor(source: string) {
315
+ this.source = source;
316
+ }
317
+
318
+ /**
319
+ * Scans the input and returns all tokens.
320
+ */
321
+ scanTokens(): Token[] {
322
+ while (!this.isAtEnd()) {
323
+ this.start = this.current;
324
+ this.startLine = this.line;
325
+ this.startColumn = this.column;
326
+ this.scanToken();
327
+ }
328
+
329
+ this.tokens.push({
330
+ type: TokenType.EOF,
331
+ lexeme: '',
332
+ startLine: this.line,
333
+ startColumn: this.column,
334
+ startOffset: this.source.length,
335
+ endLine: this.line,
336
+ endColumn: this.column,
337
+ endOffset: this.source.length,
338
+ });
339
+
340
+ return this.tokens;
341
+ }
342
+
343
+ private isAtEnd(): boolean {
344
+ return this.current >= this.source.length;
345
+ }
346
+
347
+ private scanToken(): void {
348
+ const c = this.advance();
349
+
350
+ switch (c) {
351
+ // Single-character tokens
352
+ case '(': this.addToken(TokenType.LPAREN); break;
353
+ case ')': this.addToken(TokenType.RPAREN); break;
354
+ case '{': this.addToken(TokenType.LBRACE); break;
355
+ case '}': this.addToken(TokenType.RBRACE); break;
356
+ case ',': this.addToken(TokenType.COMMA); break;
357
+ case '.': this.addToken(TokenType.DOT); break;
358
+ case ';': this.addToken(TokenType.SEMICOLON); break;
359
+ case '+': this.addToken(TokenType.PLUS); break;
360
+ case '-':
361
+ if (this.match('-')) {
362
+ // SQL-style line comment
363
+ while (this.peek() !== '\n' && !this.isAtEnd()) {
364
+ this.advance();
365
+ }
366
+ } else if (this.match('>')) {
367
+ this.addToken(TokenType.ARROW);
368
+ } else {
369
+ this.addToken(TokenType.MINUS);
370
+ }
371
+ break;
372
+ case '*': this.addToken(TokenType.ASTERISK); break;
373
+ case '/':
374
+ if (this.match('/')) {
375
+ // Single line comment
376
+ while (this.peek() !== '\n' && !this.isAtEnd()) {
377
+ this.advance();
378
+ }
379
+ } else if (this.match('*')) {
380
+ // Multiline comment
381
+ this.multilineComment();
382
+ } else {
383
+ this.addToken(TokenType.SLASH);
384
+ }
385
+ break;
386
+ case '%': this.addToken(TokenType.PERCENT); break;
387
+ case '~': this.addToken(TokenType.TILDE); break;
388
+ case '?': this.addToken(TokenType.QUESTION); break;
389
+ case ':': this.addToken(TokenType.COLON); break;
390
+ case '$': this.addToken(TokenType.DOLLAR); break;
391
+
392
+ // One or two character tokens
393
+ case '=':
394
+ this.addToken(this.match('=') ? TokenType.EQUAL_EQUAL : TokenType.EQUAL);
395
+ break;
396
+ case '!':
397
+ this.addToken(this.match('=') ? TokenType.NOT_EQUAL : TokenType.ERROR);
398
+ break;
399
+ case '<':
400
+ if (this.match('=')) {
401
+ this.addToken(TokenType.LESS_EQUAL);
402
+ } else if (this.match('>')) {
403
+ this.addToken(TokenType.NOT_EQUAL);
404
+ } else {
405
+ this.addToken(TokenType.LESS);
406
+ }
407
+ break;
408
+ case '>':
409
+ this.addToken(this.match('=') ? TokenType.GREATER_EQUAL : TokenType.GREATER);
410
+ break;
411
+ case '|':
412
+ this.addToken(this.match('|') ? TokenType.PIPE_PIPE : TokenType.PIPE);
413
+ break;
414
+ case '&':
415
+ this.addToken(this.match('&') ? TokenType.AMPERSAND_AMPERSAND : TokenType.AMPERSAND);
416
+ break;
417
+
418
+ // String literals
419
+ case '\'': this.string('\''); break;
420
+ // Double-quoted strings are identifiers in SQL standard and SQLite
421
+ case '"': this.doubleQuotedIdentifier(); break;
422
+ case '`': this.backtickIdentifier(); break;
423
+ case '[': this.bracketIdentifier(); break;
424
+
425
+ // Blob literals
426
+ case 'x':
427
+ case 'X':
428
+ if (this.match('\'')) {
429
+ this.blobLiteral();
430
+ } else {
431
+ this.identifier();
432
+ }
433
+ break;
434
+
435
+ // Whitespace
436
+ case ' ':
437
+ case '\r':
438
+ case '\t':
439
+ // Ignore whitespace
440
+ break;
441
+ case '\n':
442
+ // Newline handling already done in advance()
443
+ break;
444
+
445
+ // Default - handle identifiers and numbers
446
+ default:
447
+ if (this.isDigit(c)) {
448
+ this.number();
449
+ } else if (this.isAlpha(c)) {
450
+ this.identifier();
451
+ } else {
452
+ this.addErrorToken(`Unexpected character: ${c}`);
453
+ }
454
+ break;
455
+ }
456
+ }
457
+
458
+ private advance(): string {
459
+ const char = this.source.charAt(this.current);
460
+ this.current++;
461
+ if (char === '\n') {
462
+ this.line++;
463
+ this.column = 1;
464
+ } else {
465
+ this.column++;
466
+ }
467
+ return char;
468
+ }
469
+
470
+ private match(expected: string): boolean {
471
+ if (this.isAtEnd()) return false;
472
+ if (this.source.charAt(this.current) !== expected) return false;
473
+
474
+ this.current++;
475
+ this.column++;
476
+ return true;
477
+ }
478
+
479
+ private peek(): string {
480
+ if (this.isAtEnd()) return '\0';
481
+ return this.source.charAt(this.current);
482
+ }
483
+
484
+ private peekNext(): string {
485
+ if (this.current + 1 >= this.source.length) return '\0';
486
+ return this.source.charAt(this.current + 1);
487
+ }
488
+
489
+ private string(quote: string): void {
490
+ let value = '';
491
+ let escaping = false;
492
+
493
+ while ((!this.isAtEnd() && this.peek() !== quote) || escaping) {
494
+ if (escaping) {
495
+ // Handle escape sequences
496
+ const c = this.peek();
497
+ switch (c) {
498
+ case 'n': value += '\n'; break;
499
+ case 'r': value += '\r'; break;
500
+ case 't': value += '\t'; break;
501
+ case '\\': value += '\\'; break;
502
+ case '\'': value += '\''; break;
503
+ case '"': value += '"'; break;
504
+ case '0': value += '\0'; break;
505
+ default: value += c; break;
506
+ }
507
+ escaping = false;
508
+ } else if (this.peek() === '\\') {
509
+ escaping = true;
510
+ } else {
511
+ value += this.peek();
512
+ }
513
+
514
+ this.advance();
515
+ }
516
+
517
+ if (this.isAtEnd()) {
518
+ this.addErrorToken("Unterminated string.");
519
+ return;
520
+ }
521
+
522
+ // Consume the closing quote
523
+ this.advance();
524
+
525
+ // SQLite allows adjacent string literals to be concatenated
526
+ if (this.peek() === quote) {
527
+ this.advance(); // Consume the opening quote of the next string
528
+ this.string(quote); // Process the next string
529
+ // Merge the two string tokens
530
+ if (this.tokens.length > 0 && this.tokens[this.tokens.length - 1].type === TokenType.STRING) {
531
+ const prevToken = this.tokens.pop()!;
532
+ value += prevToken.literal;
533
+ }
534
+ }
535
+
536
+ this.addToken(TokenType.STRING, value);
537
+ }
538
+
539
+ private backtickIdentifier(): void {
540
+ let value = '';
541
+
542
+ while (!this.isAtEnd() && this.peek() !== '`') {
543
+ value += this.advance();
544
+ }
545
+
546
+ if (this.isAtEnd()) {
547
+ this.addErrorToken("Unterminated identifier.");
548
+ return;
549
+ }
550
+
551
+ // Consume the closing backtick
552
+ this.advance();
553
+
554
+ this.addToken(TokenType.IDENTIFIER, value);
555
+ }
556
+
557
+ /**
558
+ * Parse double-quoted identifiers.
559
+ * In SQL standard and SQLite, double quotes delimit identifiers (not strings).
560
+ * Supports "" escape for embedded double quotes.
561
+ */
562
+ private doubleQuotedIdentifier(): void {
563
+ let value = '';
564
+
565
+ while (!this.isAtEnd()) {
566
+ if (this.peek() === '"') {
567
+ // Check for escaped double quote ("")
568
+ if (this.peekNext() === '"') {
569
+ value += '"';
570
+ this.advance(); // First "
571
+ this.advance(); // Second "
572
+ } else {
573
+ break; // End of identifier
574
+ }
575
+ } else {
576
+ value += this.advance();
577
+ }
578
+ }
579
+
580
+ if (this.isAtEnd()) {
581
+ this.addErrorToken("Unterminated identifier.");
582
+ return;
583
+ }
584
+
585
+ // Consume the closing double quote
586
+ this.advance();
587
+
588
+ this.addToken(TokenType.IDENTIFIER, value);
589
+ }
590
+
591
+ private bracketIdentifier(): void {
592
+ let value = '';
593
+
594
+ while (!this.isAtEnd() && this.peek() !== ']') {
595
+ value += this.advance();
596
+ }
597
+
598
+ if (this.isAtEnd()) {
599
+ this.addErrorToken("Unterminated identifier.");
600
+ return;
601
+ }
602
+
603
+ // Consume the closing bracket
604
+ this.advance();
605
+
606
+ this.addToken(TokenType.IDENTIFIER, value);
607
+ }
608
+
609
+ private blobLiteral(): void {
610
+ let value = '';
611
+
612
+ while (!this.isAtEnd() && this.peek() !== '\'') {
613
+ if (this.isHexDigit(this.peek())) {
614
+ value += this.advance();
615
+ } else if (this.isWhitespace(this.peek())) {
616
+ this.advance(); // Skip whitespace in blob literals
617
+ } else {
618
+ this.addErrorToken("Invalid character in blob literal.");
619
+ return;
620
+ }
621
+ }
622
+
623
+ if (this.isAtEnd()) {
624
+ this.addErrorToken("Unterminated blob literal.");
625
+ return;
626
+ }
627
+
628
+ // Consume the closing quote
629
+ this.advance();
630
+
631
+ // Validate hex string length
632
+ if (value.length % 2 !== 0) {
633
+ this.addErrorToken("Blob literal must have an even number of hex digits.");
634
+ return;
635
+ }
636
+
637
+ // Convert hex string to Uint8Array
638
+ try {
639
+ const bytes = new Uint8Array(value.length / 2);
640
+ for (let i = 0; i < value.length; i += 2) {
641
+ bytes[i / 2] = parseInt(value.substring(i, i + 2), 16);
642
+ }
643
+ this.addToken(TokenType.BLOB, bytes);
644
+ } catch {
645
+ this.addErrorToken("Invalid blob literal.");
646
+ }
647
+ }
648
+
649
+ private number(): void {
650
+ let isFloat = false;
651
+ // Capture original lexeme starting from the first digit
652
+ const start = this.start; // Use the start offset saved before scanToken called number()
653
+
654
+ // Consume digits before decimal point
655
+ while (this.isDigit(this.peek())) {
656
+ this.advance();
657
+ }
658
+
659
+ // Check for decimal point
660
+ if (this.peek() === '.' && this.isDigit(this.peekNext())) {
661
+ isFloat = true;
662
+ this.advance(); // Consume the '.'
663
+
664
+ // Consume digits after decimal point
665
+ while (this.isDigit(this.peek())) {
666
+ this.advance();
667
+ }
668
+ }
669
+
670
+ // Check for exponent part
671
+ if (this.peek().toLowerCase() === 'e') {
672
+ isFloat = true;
673
+ this.advance(); // Consume the 'e' or 'E'
674
+
675
+ // Optional sign
676
+ if (this.peek() === '+' || this.peek() === '-') {
677
+ this.advance();
678
+ }
679
+
680
+ // Exponent digits
681
+ if (!this.isDigit(this.peek())) {
682
+ this.addErrorToken("Invalid number literal: expected digits after exponent.");
683
+ return;
684
+ }
685
+
686
+ while (this.isDigit(this.peek())) {
687
+ this.advance();
688
+ }
689
+ }
690
+
691
+ const lexeme = this.source.substring(start, this.current);
692
+
693
+ if (isFloat) {
694
+ // Store original string as literal for FLOAT
695
+ this.addToken(TokenType.FLOAT, lexeme);
696
+ } else {
697
+ // For integers, parse now to handle potential BigInt
698
+ try {
699
+ const num = parseInt(lexeme, 10);
700
+ if (!Number.isSafeInteger(num)) {
701
+ try {
702
+ this.addToken(TokenType.INTEGER, BigInt(lexeme));
703
+ } catch {
704
+ this.addErrorToken("Integer literal too large.");
705
+ }
706
+ } else {
707
+ this.addToken(TokenType.INTEGER, num);
708
+ }
709
+ } catch {
710
+ try {
711
+ this.addToken(TokenType.INTEGER, BigInt(lexeme));
712
+ } catch {
713
+ this.addErrorToken("Invalid integer literal.");
714
+ }
715
+ }
716
+ }
717
+ }
718
+
719
+ private identifier(): void {
720
+ while (this.isAlphaNumeric(this.peek())) {
721
+ this.advance();
722
+ }
723
+
724
+ // Check if the identifier is a keyword
725
+ const text = this.source.substring(this.start, this.current).toLowerCase();
726
+ const type = KEYWORDS[text] || TokenType.IDENTIFIER;
727
+
728
+ this.addToken(type);
729
+ }
730
+
731
+ private multilineComment(): void {
732
+ let nesting = 1; // Support nested comments
733
+
734
+ while (nesting > 0 && !this.isAtEnd()) {
735
+ if (this.peek() === '/' && this.peekNext() === '*') {
736
+ this.advance(); // Consume '/'
737
+ this.advance(); // Consume '*'
738
+ nesting++;
739
+ } else if (this.peek() === '*' && this.peekNext() === '/') {
740
+ this.advance(); // Consume '*'
741
+ this.advance(); // Consume '/'
742
+ nesting--;
743
+ } else {
744
+ // Advance one character and let advance() maintain line/column
745
+ this.advance();
746
+ }
747
+ }
748
+
749
+ if (nesting > 0) {
750
+ this.addErrorToken("Unterminated comment.");
751
+ }
752
+ }
753
+
754
+ private isDigit(c: string): boolean {
755
+ return c >= '0' && c <= '9';
756
+ }
757
+
758
+ private isHexDigit(c: string): boolean {
759
+ return (c >= '0' && c <= '9') ||
760
+ (c >= 'a' && c <= 'f') ||
761
+ (c >= 'A' && c <= 'F');
762
+ }
763
+
764
+ private isAlpha(c: string): boolean {
765
+ return (c >= 'a' && c <= 'z') ||
766
+ (c >= 'A' && c <= 'Z') ||
767
+ c === '_';
768
+ }
769
+
770
+ private isAlphaNumeric(c: string): boolean {
771
+ return this.isAlpha(c) || this.isDigit(c);
772
+ }
773
+
774
+ private isWhitespace(c: string): boolean {
775
+ return c === ' ' || c === '\r' || c === '\n' || c === '\t';
776
+ }
777
+
778
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
779
+ private addToken(type: TokenType, literal?: any): void {
780
+ const lexeme = this.source.substring(this.start, this.current);
781
+ this.tokens.push({
782
+ type,
783
+ lexeme, // Ensure lexeme is always the original string
784
+ literal,
785
+ startLine: this.startLine,
786
+ startColumn: this.startColumn,
787
+ startOffset: this.start,
788
+ endLine: this.line,
789
+ endColumn: this.column -1,
790
+ endOffset: this.current,
791
+ });
792
+ }
793
+
794
+ private addErrorToken(message: string): void {
795
+ this.tokens.push({
796
+ type: TokenType.ERROR,
797
+ lexeme: message,
798
+ startLine: this.line,
799
+ startColumn: this.column -1,
800
+ startOffset: this.current -1,
801
+ endLine: this.line,
802
+ endColumn: this.column -1,
803
+ endOffset: this.current,
804
+ });
805
+ }
806
+ }