squirreling 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,520 @@
1
+ /**
2
+ * @import { AggregateColumn, AggregateArg, AggregateFunc, ExprCursor, ExprNode, JoinClause, JoinType, OrderByItem, ParserState, SelectStatement, SelectColumn, StringFunc, Token, TokenType } from '../types.js'
3
+ */
4
+
5
+ import { tokenize } from './tokenize.js'
6
+ import { parseExpression, parsePrimary } from './expression.js'
7
+
8
+ // Keywords that cannot be used as implicit aliases after a column
9
+ const RESERVED_AFTER_COLUMN = new Set([
10
+ 'FROM',
11
+ 'WHERE',
12
+ 'GROUP',
13
+ 'HAVING',
14
+ 'ORDER',
15
+ 'LIMIT',
16
+ 'OFFSET',
17
+ ])
18
+
19
+ /**
20
+ * @param {string} sql
21
+ * @returns {SelectStatement}
22
+ */
23
+ export function parseSql(sql) {
24
+ const tokens = tokenize(sql)
25
+ /** @type {ParserState} */
26
+ const state = { tokens, pos: 0 }
27
+ const select = parseSelectInternal(state)
28
+
29
+ const tok = current(state)
30
+ if (tok.type !== 'eof') {
31
+ throw parseError(state, 'end of query')
32
+ }
33
+
34
+ return select
35
+ }
36
+
37
+ /**
38
+ * @param {ParserState} state
39
+ * @returns {Token}
40
+ */
41
+ function current(state) {
42
+ return state.tokens[state.pos]
43
+ }
44
+
45
+ /**
46
+ * @param {ParserState} state
47
+ * @param {number} offset
48
+ * @returns {Token}
49
+ */
50
+ function peekToken(state, offset) {
51
+ const idx = state.pos + offset
52
+ if (idx >= state.tokens.length) {
53
+ return state.tokens[state.tokens.length - 1]
54
+ }
55
+ return state.tokens[idx]
56
+ }
57
+
58
+ /**
59
+ * @param {ParserState} state
60
+ * @returns {Token}
61
+ */
62
+ function consume(state) {
63
+ const tok = current(state)
64
+ if (state.pos < state.tokens.length - 1) {
65
+ state.pos += 1
66
+ }
67
+ return tok
68
+ }
69
+
70
+ /**
71
+ * @param {ParserState} state
72
+ * @param {TokenType} type
73
+ * @param {string} [value]
74
+ * @returns {boolean}
75
+ */
76
+ function match(state, type, value) {
77
+ const tok = current(state)
78
+ if (tok.type !== type) return false
79
+ if (typeof value === 'string' && tok.value !== value) return false
80
+ consume(state)
81
+ return true
82
+ }
83
+
84
+ /**
85
+ * @param {ParserState} state
86
+ * @param {TokenType} type
87
+ * @param {string} value
88
+ * @returns {Token}
89
+ */
90
+ function expect(state, type, value) {
91
+ const tok = current(state)
92
+ if (tok.type !== type || tok.value !== value) {
93
+ throw parseError(state, value)
94
+ }
95
+ consume(state)
96
+ return tok
97
+ }
98
+
99
+ /**
100
+ * @param {ParserState} state
101
+ * @returns {Token}
102
+ */
103
+ function expectIdentifier(state) {
104
+ const tok = current(state)
105
+ if (tok.type !== 'identifier') {
106
+ throw parseError(state, 'identifier')
107
+ }
108
+ consume(state)
109
+ return tok
110
+ }
111
+
112
+ /**
113
+ * Creates an ExprCursor adapter for the ParserState.
114
+ * @param {ParserState} state
115
+ * @returns {ExprCursor}
116
+ */
117
+ function createExprCursor(state) {
118
+ return {
119
+ current: () => current(state),
120
+ peek: (offset) => peekToken(state, offset),
121
+ consume: () => consume(state),
122
+ match: (type, value) => match(state, type, value),
123
+ expect: (type, value) => expect(state, type, value),
124
+ expectIdentifier: () => expectIdentifier(state),
125
+ }
126
+ }
127
+
128
+ /**
129
+ * @param {ParserState} state
130
+ * @returns {SelectColumn[]}
131
+ */
132
+ function parseSelectList(state) {
133
+ /** @type {SelectColumn[]} */
134
+ const cols = []
135
+ const tok = current(state)
136
+
137
+ if (tok.type === 'operator' && tok.value === '*') {
138
+ consume(state)
139
+ cols.push({ kind: 'star' })
140
+ return cols
141
+ }
142
+
143
+ while (true) {
144
+ cols.push(parseSelectItem(state))
145
+ if (!match(state, 'comma')) break
146
+ }
147
+
148
+ return cols
149
+ }
150
+
151
+ /**
152
+ * @param {ParserState} state
153
+ * @returns {SelectColumn}
154
+ */
155
+ function parseSelectItem(state) {
156
+ const tok = current(state)
157
+
158
+ if (tok.type !== 'identifier' && tok.type !== 'operator') {
159
+ throw parseError(state, 'column name or expression')
160
+ }
161
+
162
+ if (tok.type === 'identifier' && tok.value === 'CAST') {
163
+ expectIdentifier(state) // consume CAST
164
+ expect(state, 'paren', '(')
165
+ const cursor = createExprCursor(state)
166
+ const expr = parseExpression(cursor)
167
+ expect(state, 'keyword', 'AS')
168
+ const typeTok = expectIdentifier(state)
169
+ expect(state, 'paren', ')')
170
+ const alias = parseAs(state)
171
+ return {
172
+ kind: 'operation',
173
+ expr: { type: 'cast', expr, toType: typeTok.value },
174
+ alias,
175
+ }
176
+ }
177
+
178
+ if (tok.type === 'operator') {
179
+ // Handle SELECT expression AS alias
180
+ const cursor = createExprCursor(state)
181
+ const expr = parseExpression(cursor)
182
+ const alias = parseAs(state)
183
+ return { kind: 'operation', expr, alias }
184
+ }
185
+
186
+ const next = peekToken(state, 1)
187
+ const upper = tok.value.toUpperCase()
188
+
189
+ if (next.type === 'paren' && next.value === '(') {
190
+ expectIdentifier(state) // consume function name
191
+ if (isAggregateFunc(upper)) {
192
+ return parseAggregateItem(state, upper)
193
+ }
194
+ if (isStringFunc(upper)) {
195
+ return parseStringFunctionItem(state, upper)
196
+ }
197
+ }
198
+
199
+ consume(state)
200
+ let column = tok.value
201
+
202
+ // Handle dot notation (table.column)
203
+ if (current(state).type === 'dot') {
204
+ consume(state) // consume the dot
205
+ const columnTok = expectIdentifier(state)
206
+ column += '.' + columnTok.value
207
+ }
208
+
209
+ const alias = parseAs(state)
210
+
211
+ return { kind: 'column', column, alias }
212
+ }
213
+
214
+ /**
215
+ * @param {ParserState} state
216
+ * @param {AggregateFunc} func
217
+ * @returns {AggregateColumn}
218
+ */
219
+ function parseAggregateItem(state, func) {
220
+ expect(state, 'paren', '(')
221
+
222
+ /** @type {AggregateArg} */
223
+ let arg
224
+
225
+ const cur = current(state)
226
+ if (cur.type === 'operator' && cur.value === '*') {
227
+ consume(state)
228
+ arg = { kind: 'star' }
229
+ } else {
230
+ const colTok = expectIdentifier(state)
231
+ arg = {
232
+ kind: 'column',
233
+ column: colTok.value,
234
+ }
235
+ }
236
+
237
+ expect(state, 'paren', ')')
238
+
239
+ const alias = parseAs(state)
240
+
241
+ return { kind: 'aggregate', func, arg, alias }
242
+ }
243
+
244
+ /**
245
+ * @param {ParserState} state
246
+ * @param {StringFunc} func
247
+ * @returns {SelectColumn}
248
+ */
249
+ function parseStringFunctionItem(state, func) {
250
+ expect(state, 'paren', '(')
251
+
252
+ /** @type {ExprNode[]} */
253
+ const args = []
254
+
255
+ // Parse comma-separated arguments
256
+ if (current(state).type !== 'paren' || current(state).value !== ')') {
257
+ const cursor = createExprCursor(state)
258
+ while (true) {
259
+ const arg = parsePrimary(cursor)
260
+ args.push(arg)
261
+ if (!match(state, 'comma')) break
262
+ }
263
+ }
264
+
265
+ expect(state, 'paren', ')')
266
+
267
+ const alias = parseAs(state)
268
+
269
+ return { kind: 'function', func, args, alias }
270
+ }
271
+
272
+ /**
273
+ * @param {ParserState} state
274
+ * @returns {string | undefined}
275
+ */
276
+ function parseAs(state) {
277
+ if (match(state, 'keyword', 'AS')) {
278
+ // After AS, allow keywords as aliases (except reserved ones)
279
+ const aliasTok = current(state)
280
+ if (aliasTok.type === 'identifier') {
281
+ consume(state)
282
+ return aliasTok.value
283
+ } else if (aliasTok.type === 'keyword' && !RESERVED_AFTER_COLUMN.has(aliasTok.value.toUpperCase())) {
284
+ consume(state)
285
+ // Use original case for keywords used as aliases
286
+ return aliasTok.originalValue ?? aliasTok.value
287
+ } else {
288
+ throw parseError(state, 'alias')
289
+ }
290
+ } else {
291
+ // Implicit alias SELECT UPPER(name) name_upper
292
+ const maybeAlias = current(state)
293
+ if (maybeAlias.type === 'identifier' && !RESERVED_AFTER_COLUMN.has(maybeAlias.value.toUpperCase())) {
294
+ consume(state)
295
+ return maybeAlias.value
296
+ }
297
+ }
298
+ }
299
+
300
+ /**
301
+ * @param {ParserState} state
302
+ * @returns {JoinClause[]}
303
+ */
304
+ function parseJoins(state) {
305
+ /** @type {JoinClause[]} */
306
+ const joins = []
307
+
308
+ while (true) {
309
+ const tok = current(state)
310
+
311
+ // Check for join keywords
312
+ /** @type {JoinType} */
313
+ let joinType = 'INNER'
314
+
315
+ if (tok.type === 'keyword') {
316
+ if (tok.value === 'INNER') {
317
+ consume(state)
318
+ joinType = 'INNER'
319
+ } else if (tok.value === 'LEFT') {
320
+ consume(state)
321
+ if (match(state, 'keyword', 'OUTER')) {
322
+ // LEFT OUTER JOIN
323
+ }
324
+ joinType = 'LEFT'
325
+ } else if (tok.value === 'RIGHT') {
326
+ consume(state)
327
+ if (match(state, 'keyword', 'OUTER')) {
328
+ // RIGHT OUTER JOIN
329
+ }
330
+ joinType = 'RIGHT'
331
+ } else if (tok.value === 'FULL') {
332
+ consume(state)
333
+ if (match(state, 'keyword', 'OUTER')) {
334
+ // FULL OUTER JOIN
335
+ }
336
+ joinType = 'FULL'
337
+ } else if (tok.value === 'JOIN') {
338
+ // Just JOIN (defaults to INNER)
339
+ consume(state)
340
+ } else {
341
+ // Not a join keyword, stop parsing joins
342
+ break
343
+ }
344
+
345
+ // If we consumed a join type keyword (INNER/LEFT/RIGHT/FULL), expect JOIN
346
+ if (tok.value !== 'JOIN') {
347
+ expect(state, 'keyword', 'JOIN')
348
+ }
349
+ } else {
350
+ // No more joins
351
+ break
352
+ }
353
+
354
+ // Parse table name
355
+ const tableTok = expectIdentifier(state)
356
+ const tableName = tableTok.value
357
+
358
+ // Parse ON condition
359
+ expect(state, 'keyword', 'ON')
360
+ const cursor = createExprCursor(state)
361
+ const condition = parseExpression(cursor)
362
+
363
+ joins.push({
364
+ type: joinType,
365
+ table: tableName,
366
+ on: condition,
367
+ })
368
+ }
369
+
370
+ return joins
371
+ }
372
+
373
+ /**
374
+ * @param {ParserState} state
375
+ * @returns {SelectStatement}
376
+ */
377
+ function parseSelectInternal(state) {
378
+ expect(state, 'keyword', 'SELECT')
379
+
380
+ let distinct = false
381
+ if (match(state, 'keyword', 'DISTINCT')) {
382
+ distinct = true
383
+ }
384
+
385
+ const columns = parseSelectList(state)
386
+
387
+ expect(state, 'keyword', 'FROM')
388
+ const from = expectIdentifier(state).value // table name
389
+
390
+ // Parse JOIN clauses
391
+ const joins = parseJoins(state)
392
+
393
+ /** @type {ExprNode | undefined} */
394
+ let where
395
+ /** @type {ExprNode[]} */
396
+ const groupBy = []
397
+ /** @type {OrderByItem[]} */
398
+ const orderBy = []
399
+ /** @type {number | undefined} */
400
+ let limit
401
+ /** @type {number | undefined} */
402
+ let offset
403
+
404
+ const cursor = createExprCursor(state)
405
+
406
+ if (match(state, 'keyword', 'WHERE')) {
407
+ where = parseExpression(cursor)
408
+ }
409
+
410
+ if (match(state, 'keyword', 'GROUP')) {
411
+ expect(state, 'keyword', 'BY')
412
+ while (true) {
413
+ const expr = parseExpression(cursor)
414
+ groupBy.push(expr)
415
+ if (!match(state, 'comma')) break
416
+ }
417
+ }
418
+
419
+ if (match(state, 'keyword', 'ORDER')) {
420
+ expect(state, 'keyword', 'BY')
421
+ while (true) {
422
+ const expr = parseExpression(cursor)
423
+ /** @type {'ASC' | 'DESC'} */
424
+ let direction = 'ASC'
425
+ if (match(state, 'keyword', 'ASC')) {
426
+ direction = 'ASC'
427
+ } else if (match(state, 'keyword', 'DESC')) {
428
+ direction = 'DESC'
429
+ }
430
+ orderBy.push({
431
+ expr,
432
+ direction,
433
+ })
434
+ if (!match(state, 'comma')) break
435
+ }
436
+ }
437
+
438
+ if (match(state, 'keyword', 'LIMIT')) {
439
+ const tok = current(state)
440
+ if (tok.type !== 'number') {
441
+ throw parseError(state, 'numeric LIMIT')
442
+ }
443
+ consume(state)
444
+ const n = parseInt(tok.value, 10)
445
+ if (!Number.isFinite(n)) {
446
+ throw parseError(state, 'valid LIMIT value')
447
+ }
448
+ limit = n
449
+
450
+ if (match(state, 'keyword', 'OFFSET')) {
451
+ const oTok = current(state)
452
+ if (oTok.type !== 'number') {
453
+ throw parseError(state, 'numeric OFFSET')
454
+ }
455
+ consume(state)
456
+ const off = parseInt(oTok.value, 10)
457
+ if (!Number.isFinite(off)) {
458
+ throw parseError(state, 'valid OFFSET value')
459
+ }
460
+ offset = off
461
+ }
462
+ } else if (match(state, 'keyword', 'OFFSET')) {
463
+ const oTok = current(state)
464
+ if (oTok.type !== 'number') {
465
+ throw parseError(state, 'numeric OFFSET')
466
+ }
467
+ consume(state)
468
+ const off = parseInt(oTok.value, 10)
469
+ if (!Number.isFinite(off)) {
470
+ throw parseError(state, 'valid OFFSET value')
471
+ }
472
+ offset = off
473
+ }
474
+
475
+ // optional trailing semicolon
476
+ if (current(state).type === 'semicolon') {
477
+ consume(state)
478
+ }
479
+
480
+ return {
481
+ distinct,
482
+ columns,
483
+ from,
484
+ joins,
485
+ where,
486
+ groupBy,
487
+ orderBy,
488
+ limit,
489
+ offset,
490
+ }
491
+ }
492
+
493
+ /**
494
+ * Helper function to create consistent parser error messages.
495
+ * @param {ParserState} state
496
+ * @param {string} expected - Description of what was expected
497
+ * @returns {Error}
498
+ */
499
+ function parseError(state, expected) {
500
+ const tok = current(state)
501
+ const prevToken = state.tokens[state.pos - 1]
502
+ const after = prevToken ? ` after "${prevToken.originalValue ?? prevToken.value}"` : ''
503
+ return new Error(`Expected ${expected}${after} at position ${tok.position}`)
504
+ }
505
+
506
+ /**
507
+ * @param {string} name
508
+ * @returns {name is AggregateFunc}
509
+ */
510
+ function isAggregateFunc(name) {
511
+ return ['COUNT', 'SUM', 'AVG', 'MIN', 'MAX'].includes(name)
512
+ }
513
+
514
+ /**
515
+ * @param {string} name
516
+ * @returns {name is StringFunc}
517
+ */
518
+ function isStringFunc(name) {
519
+ return ['UPPER', 'LOWER', 'CONCAT', 'LENGTH', 'SUBSTRING', 'TRIM'].includes(name)
520
+ }